From 225a32af72b6f9dac6adb35263201ec7a2a08901 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 26 Sep 2018 02:39:42 +0000 Subject: [PATCH] [ORC] Add support for multithreaded compiles to LLJIT and LLLazyJIT. LLJIT and LLLazyJIT can now be constructed with an optional NumCompileThreads arguments. If this is non-zero then a thread-pool will be created with the given number of threads, and compile tasks will be dispatched to the thread pool. To enable testing of this feature, two new flags are added to lli: (1) -compile-threads=N (N = 0 by default) controls the number of compile threads to use. (2) -thread-entry can be used to execute code on additional threads. For each -thread-entry argument supplied (multiple are allowed) a new thread will be created and the given symbol called. These additional thread entry points are called after static constructors are run, but before main. llvm-svn: 343058 --- .../llvm/ExecutionEngine/Orc/ExecutionUtils.h | 3 + llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h | 41 +++++++-- llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 90 +++++++++++++++++-- .../OrcLazy/multiple-compile-threads-basic.ll | 18 ++++ llvm/tools/lli/lli.cpp | 54 +++++++++-- 5 files changed, 185 insertions(+), 21 deletions(-) create mode 100644 llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index f56b4031173e..6a9731655b77 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -67,6 +67,9 @@ public: SubtargetFeatures &getFeatures() { return Features; } TargetOptions &getOptions() { return Options; } + Triple& getTargetTriple() { return TT; } + const Triple& getTargetTriple() const { return TT; } + private: Triple TT; std::string Arch; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h index 57b991f8c751..4c840dae69f5 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -22,7 +22,7 @@ #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ThreadPool.h" namespace llvm { namespace orc { @@ -30,11 +30,19 @@ namespace orc { /// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT. class LLJIT { public: - /// Create an LLJIT instance. - static Expected> - Create(std::unique_ptr TM, DataLayout DL); - /// Returns a reference to the ExecutionSession for this JIT instance. + /// Destruct this instance. If a multi-threaded instance, waits for all + /// compile threads to complete. + ~LLJIT(); + + /// Create an LLJIT instance. + /// If NumCompileThreads is not equal to zero, creates a multi-threaded + /// LLJIT with the given number of compile threads. + static Expected> + Create(JITTargetMachineBuilder JTMB, DataLayout DL, + unsigned NumCompileThreads = 0); + + /// Returns the ExecutionSession for this instance. ExecutionSession &getExecutionSession() { return *ES; } /// Returns a reference to the JITDylib representing the JIT'd main program. @@ -91,9 +99,15 @@ public: RTDyldObjectLinkingLayer2 &getObjLinkingLayer() { return ObjLinkingLayer; } protected: + + /// Create an LLJIT instance with a single compile thread. LLJIT(std::unique_ptr ES, std::unique_ptr TM, DataLayout DL); + /// Create an LLJIT instance with multiple compile threads. + LLJIT(std::unique_ptr ES, JITTargetMachineBuilder JTMB, + DataLayout DL, unsigned NumCompileThreads); + std::unique_ptr getMemoryManager(VModuleKey K); std::string mangle(StringRef UnmangledName); @@ -105,8 +119,8 @@ protected: std::unique_ptr ES; JITDylib &Main; - std::unique_ptr TM; DataLayout DL; + std::unique_ptr CompileThreads; RTDyldObjectLinkingLayer2 ObjLinkingLayer; IRCompileLayer2 CompileLayer; @@ -118,9 +132,13 @@ protected: /// compilation of LLVM IR. class LLLazyJIT : public LLJIT { public: + /// Create an LLLazyJIT instance. + /// If NumCompileThreads is not equal to zero, creates a multi-threaded + /// LLLazyJIT with the given number of compile threads. static Expected> - Create(std::unique_ptr TM, DataLayout DL); + Create(JITTargetMachineBuilder JTMB, DataLayout DL, + unsigned NumCompileThreads = 0); /// Set an IR transform (e.g. pass manager pipeline) to run on each function /// when it is compiled. @@ -137,11 +155,20 @@ public: } private: + + // Create a single-threaded LLLazyJIT instance. LLLazyJIT(std::unique_ptr ES, std::unique_ptr TM, DataLayout DL, std::unique_ptr CCMgr, std::function()> ISMBuilder); + // Create a multi-threaded LLLazyJIT instance. + LLLazyJIT(std::unique_ptr ES, + JITTargetMachineBuilder JTMB, DataLayout DL, + unsigned NumCompileThreads, + std::unique_ptr CCMgr, + std::function()> ISMBuilder); + std::unique_ptr CCMgr; std::function()> ISMBuilder; diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index c79c47a0e336..ecdfd862c3b7 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -12,13 +12,45 @@ #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/Mangler.h" +namespace { + + // A SimpleCompiler that owns its TargetMachine. + class TMOwningSimpleCompiler : public llvm::orc::SimpleCompiler { + public: + TMOwningSimpleCompiler(std::unique_ptr TM) + : llvm::orc::SimpleCompiler(*TM), TM(std::move(TM)) {} + private: + // FIXME: shared because std::functions (and thus + // IRCompileLayer2::CompileFunction) are not moveable. + std::shared_ptr TM; + }; + +} // end anonymous namespace + namespace llvm { namespace orc { +LLJIT::~LLJIT() { + if (CompileThreads) + CompileThreads->wait(); +} + Expected> -LLJIT::Create(std::unique_ptr TM, DataLayout DL) { +LLJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL, + unsigned NumCompileThreads) { + + if (NumCompileThreads == 0) { + // If NumCompileThreads == 0 then create a single-threaded LLJIT instance. + auto TM = JTMB.createTargetMachine(); + if (!TM) + return TM.takeError(); + return std::unique_ptr(new LLJIT(llvm::make_unique(), + std::move(*TM), std::move(DL))); + } + return std::unique_ptr(new LLJIT(llvm::make_unique(), - std::move(TM), std::move(DL))); + std::move(JTMB), std::move(DL), + NumCompileThreads)); } Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) { @@ -52,12 +84,35 @@ Expected LLJIT::lookupLinkerMangled(JITDylib &JD, LLJIT::LLJIT(std::unique_ptr ES, std::unique_ptr TM, DataLayout DL) : ES(std::move(ES)), Main(this->ES->createJITDylib("main")), - TM(std::move(TM)), DL(std::move(DL)), + DL(std::move(DL)), ObjLinkingLayer(*this->ES, [this](VModuleKey K) { return getMemoryManager(K); }), - CompileLayer(*this->ES, ObjLinkingLayer, SimpleCompiler(*this->TM)), + CompileLayer(*this->ES, ObjLinkingLayer, TMOwningSimpleCompiler(std::move(TM))), CtorRunner(Main), DtorRunner(Main) {} +LLJIT::LLJIT(std::unique_ptr ES, + JITTargetMachineBuilder JTMB, DataLayout DL, + unsigned NumCompileThreads) + : ES(std::move(ES)), Main(this->ES->createJITDylib("main")), + DL(std::move(DL)), + ObjLinkingLayer(*this->ES, + [this](VModuleKey K) { return getMemoryManager(K); }), + CompileLayer(*this->ES, ObjLinkingLayer, MultiThreadedSimpleCompiler(std::move(JTMB))), + CtorRunner(Main), DtorRunner(Main) { + assert(NumCompileThreads != 0 && + "Multithreaded LLJIT instance can not be created with 0 threads"); + + CompileThreads = llvm::make_unique(NumCompileThreads); + this->ES->setDispatchMaterialization([this](JITDylib &JD, std::unique_ptr MU) { + // FIXME: Switch to move capture once we have c++14. + auto SharedMU = std::shared_ptr(std::move(MU)); + auto Work = [SharedMU, &JD]() { + SharedMU->doMaterialize(JD); + }; + CompileThreads->async(std::move(Work)); + }); +} + std::unique_ptr LLJIT::getMemoryManager(VModuleKey K) { return llvm::make_unique(); @@ -90,10 +145,11 @@ void LLJIT::recordCtorDtors(Module &M) { } Expected> -LLLazyJIT::Create(std::unique_ptr TM, DataLayout DL) { + LLLazyJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL, + unsigned NumCompileThreads) { auto ES = llvm::make_unique(); - const Triple &TT = TM->getTargetTriple(); + const Triple &TT = JTMB.getTargetTriple(); auto CCMgr = createLocalCompileCallbackManager(TT, *ES, 0); if (!CCMgr) @@ -107,9 +163,18 @@ LLLazyJIT::Create(std::unique_ptr TM, DataLayout DL) { std::string("No indirect stubs manager builder for ") + TT.str(), inconvertibleErrorCode()); - return std::unique_ptr( - new LLLazyJIT(std::move(ES), std::move(TM), std::move(DL), + if (NumCompileThreads == 0) { + auto TM = JTMB.createTargetMachine(); + if (!TM) + return TM.takeError(); + return std::unique_ptr( + new LLLazyJIT(std::move(ES), std::move(*TM), std::move(DL), std::move(CCMgr), std::move(ISMBuilder))); + } + + return std::unique_ptr( + new LLLazyJIT(std::move(ES), std::move(JTMB), std::move(DL), + NumCompileThreads, std::move(CCMgr), std::move(ISMBuilder))); } Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) { @@ -135,5 +200,14 @@ LLLazyJIT::LLLazyJIT( CODLayer(*this->ES, TransformLayer, *this->CCMgr, std::move(ISMBuilder)) { } +LLLazyJIT::LLLazyJIT( + std::unique_ptr ES, JITTargetMachineBuilder JTMB, + DataLayout DL, unsigned NumCompileThreads, std::unique_ptr CCMgr, + std::function()> ISMBuilder) + : LLJIT(std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads), + CCMgr(std::move(CCMgr)), TransformLayer(*this->ES, CompileLayer), + CODLayer(*this->ES, TransformLayer, *this->CCMgr, std::move(ISMBuilder)) { +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll b/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll new file mode 100644 index 000000000000..a53f23b100c3 --- /dev/null +++ b/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll @@ -0,0 +1,18 @@ +; RUN: lli -jit-kind=orc-lazy -compile-threads=5 -thread-entry hello %s | FileCheck %s +; +; CHECK: Hello + +@.str = private unnamed_addr constant [7 x i8] c"Hello\0A\00", align 1 + +define void @hello() { +entry: + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0)) + ret void +} + +declare i32 @printf(i8*, ...) + +define i32 @main(i32 %argc, i8** %argv) { +entry: + ret i32 0 +} diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index 2312d7703486..49d5db133262 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -97,6 +97,17 @@ namespace { "orc-lazy", "Orc-based lazy JIT."))); + cl::opt + LazyJITCompileThreads("compile-threads", + cl::desc("Choose the number of compile threads " + "(jit-kind=orc-lazy only)"), + cl::init(0)); + + cl::list + ThreadEntryPoints("thread-entry", + cl::desc("calls the given entry-point on a new thread " + "(jit-kind=orc-lazy only)")); + // The MCJIT supports building for a target address space separate from // the JIT compilation process. Use a forked process and a copying // memory manager with IPC to execute using this functionality. @@ -363,6 +374,19 @@ int main(int argc, char **argv, char * const *envp) { if (UseJITKind == JITKind::OrcLazy) return runOrcLazyJIT(argv[0]); + else { + // Make sure nobody used an orc-lazy specific option accidentally. + + if (LazyJITCompileThreads != 0) { + errs() << "-compile-threads requires -jit-kind=orc-lazy\n"; + exit(1); + } + + if (!ThreadEntryPoints.empty()) { + errs() << "-thread-entry requires -jit-kind=orc-lazy\n"; + exit(1); + } + } LLVMContext Context; @@ -745,11 +769,11 @@ int runOrcLazyJIT(const char *ProgName) { reportError(Err, ProgName); const auto &TT = MainModule.getModule()->getTargetTriple(); - orc::JITTargetMachineBuilder TMD = + orc::JITTargetMachineBuilder JTMB = TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost()) : orc::JITTargetMachineBuilder(Triple(TT)); - TMD.setArch(MArch) + JTMB.setArch(MArch) .setCPU(getCPUStr()) .addFeatures(getFeatureList()) .setRelocationModel(RelocModel.getNumOccurrences() @@ -758,9 +782,13 @@ int runOrcLazyJIT(const char *ProgName) { .setCodeModel(CMModel.getNumOccurrences() ? Optional(CMModel) : None); - auto TM = ExitOnErr(TMD.createTargetMachine()); - auto DL = TM->createDataLayout(); - auto J = ExitOnErr(orc::LLLazyJIT::Create(std::move(TM), DL)); + DataLayout DL(""); + { + // Create a throwaway TargetMachine to get the data layout. + auto TM = ExitOnErr(JTMB.createTargetMachine()); + DL = TM->createDataLayout(); + } + auto J = ExitOnErr(orc::LLLazyJIT::Create(std::move(JTMB), DL, LazyJITCompileThreads)); auto Dump = createDebugDumper(); @@ -807,6 +835,16 @@ int runOrcLazyJIT(const char *ProgName) { // Run any static constructors. ExitOnErr(J->runConstructors()); + // Run any -thread-entry points. + std::vector AltEntryThreads; + for (auto &ThreadEntryPoint : ThreadEntryPoints) { + auto EntryPointSym = ExitOnErr(J->lookup(ThreadEntryPoint)); + typedef void (*EntryPointPtr)(); + auto EntryPoint = + reinterpret_cast(static_cast(EntryPointSym.getAddress())); + AltEntryThreads.push_back(std::thread([EntryPoint]() { EntryPoint(); })); + } + // Run main. auto MainSym = ExitOnErr(J->lookup("main")); typedef int (*MainFnPtr)(int, const char *[]); @@ -817,8 +855,12 @@ int runOrcLazyJIT(const char *ProgName) { reinterpret_cast(static_cast(MainSym.getAddress())); auto Result = Main(ArgV.size(), (const char **)ArgV.data()); - ExitOnErr(J->runDestructors()); + // Wait for -entry-point threads. + for (auto &AltEntryThread : AltEntryThreads) + AltEntryThread.join(); + // Run destructors. + ExitOnErr(J->runDestructors()); CXXRuntimeOverrides.runDestructors(); return Result;