From 8bbea9cde7494b13da3f154592116a702dcb3677 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Sat, 4 May 2013 17:01:55 +0000 Subject: [PATCH] [XCore] Move lowering of thread local storage to a separate pass. Thread local storage is not supported by the XMOS linker so we handle thread local variables by lowering the variable to an array of n elements (where n is the number of hardware threads per core, currently 8 for all XMOS devices) indexed by the the current thread ID. Previously this lowering was spread across the XCoreISelLowering and the XCoreAsmPrinter classes. Moving this to a separate pass should be much cleaner. llvm-svn: 181124 --- llvm/lib/Target/XCore/CMakeLists.txt | 1 + llvm/lib/Target/XCore/XCore.h | 4 + llvm/lib/Target/XCore/XCoreAsmPrinter.cpp | 15 +- llvm/lib/Target/XCore/XCoreISelLowering.cpp | 42 ----- .../Target/XCore/XCoreLowerThreadLocal.cpp | 145 ++++++++++++++++++ llvm/lib/Target/XCore/XCoreTargetMachine.cpp | 6 + 6 files changed, 158 insertions(+), 55 deletions(-) create mode 100644 llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp diff --git a/llvm/lib/Target/XCore/CMakeLists.txt b/llvm/lib/Target/XCore/CMakeLists.txt index 099ad390d2a7..d5bfddc23e0f 100644 --- a/llvm/lib/Target/XCore/CMakeLists.txt +++ b/llvm/lib/Target/XCore/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_target(XCoreCodeGen XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp XCoreISelLowering.cpp + XCoreLowerThreadLocal.cpp XCoreMachineFunctionInfo.cpp XCoreMCInstLower.cpp XCoreRegisterInfo.cpp diff --git a/llvm/lib/Target/XCore/XCore.h b/llvm/lib/Target/XCore/XCore.h index 08f091e5b870..2f375fc952ca 100644 --- a/llvm/lib/Target/XCore/XCore.h +++ b/llvm/lib/Target/XCore/XCore.h @@ -20,12 +20,16 @@ namespace llvm { class FunctionPass; + class ModulePass; class TargetMachine; class XCoreTargetMachine; class formatted_raw_ostream; + void initializeXCoreLowerThreadLocalPass(PassRegistry &p); + FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel); + ModulePass *createXCoreLowerThreadLocalPass(); } // end namespace llvm; diff --git a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index 0d146ba4d98d..e177ad300cfe 100644 --- a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -36,7 +36,6 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -46,12 +45,6 @@ #include using namespace llvm; -static cl::opt MaxThreads("xcore-max-threads", cl::Optional, - cl::desc("Maximum number of threads (for emulation thread-local storage)"), - cl::Hidden, - cl::value_desc("number"), - cl::init(8)); - namespace { class XCoreAsmPrinter : public AsmPrinter { const XCoreSubtarget &Subtarget; @@ -152,10 +145,10 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { EmitAlignment(Align > 2 ? Align : 2, GV); - unsigned Size = TD->getTypeAllocSize(C->getType()); if (GV->isThreadLocal()) { - Size *= MaxThreads; + report_fatal_error("TLS is not supported by this target!"); } + unsigned Size = TD->getTypeAllocSize(C->getType()); if (MAI->hasDotTypeDotSizeDirective()) { OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," + @@ -164,10 +157,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.EmitLabel(GVSym); EmitGlobalConstant(C); - if (GV->isThreadLocal()) { - for (unsigned i = 1; i < MaxThreads; ++i) - EmitGlobalConstant(C); - } // The ABI requires that unsigned scalar types smaller than 32 bits // are padded to 32 bits. if (Size < 4) diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp index f0346f6715c5..28472ea339b9 100644 --- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -120,9 +120,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32 , Custom); - // Thread Local Storage - setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - // Conversion of i64 -> double produces constantpool nodes setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -172,7 +169,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); @@ -255,44 +251,6 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) { DAG.getConstant(Intrinsic::xcore_getid, MVT::i32)); } -static inline bool isZeroLengthArray(Type *Ty) { - ArrayType *AT = dyn_cast_or_null(Ty); - return AT && (AT->getNumElements() == 0); -} - -SDValue XCoreTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const -{ - // FIXME there isn't really debug info here - DebugLoc dl = Op.getDebugLoc(); - // transform to label + getid() * size - const GlobalValue *GV = cast(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); - const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias then use the aliasee to determine size - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); - } - if (!GVar) { - llvm_unreachable("Thread local object not a GlobalVariable?"); - } - Type *Ty = cast(GV->getType())->getElementType(); - if (!Ty->isSized() || isZeroLengthArray(Ty)) { -#ifndef NDEBUG - errs() << "Size of thread local object " << GVar->getName() - << " is unknown\n"; -#endif - llvm_unreachable(0); - } - SDValue base = getGlobalAddressWrapper(GA, GV, DAG); - const DataLayout *TD = TM.getDataLayout(); - unsigned Size = TD->getTypeAllocSize(Ty); - SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl), - DAG.getConstant(Size, MVT::i32)); - return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset); -} - SDValue XCoreTargetLowering:: LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp new file mode 100644 index 000000000000..2e328b4e3441 --- /dev/null +++ b/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -0,0 +1,145 @@ +//===-- XCoreLowerThreadLocal - Lower thread local variables --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains a pass that lowers thread local variables on the +/// XCore. +/// +//===----------------------------------------------------------------------===// + +#include "XCore.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" + +#define DEBUG_TYPE "xcore-lower-thread-local" + +using namespace llvm; + +static cl::opt MaxThreads( + "xcore-max-threads", cl::Optional, + cl::desc("Maximum number of threads (for emulation thread-local storage)"), + cl::Hidden, cl::value_desc("number"), cl::init(8)); + +namespace { + /// Lowers thread local variables on the XCore. Each thread local variable is + /// expanded to an array of n elements indexed by the thread ID where n is the + /// fixed number hardware threads supported by the device. + struct XCoreLowerThreadLocal : public ModulePass { + static char ID; + + XCoreLowerThreadLocal() : ModulePass(ID) { + initializeXCoreLowerThreadLocalPass(*PassRegistry::getPassRegistry()); + } + + bool lowerGlobal(GlobalVariable *GV); + + bool runOnModule(Module &M); + }; +} + +char XCoreLowerThreadLocal::ID = 0; + +INITIALIZE_PASS(XCoreLowerThreadLocal, "xcore-lower-thread-local", + "Lower thread local variables", false, false) + +ModulePass *llvm::createXCoreLowerThreadLocalPass() { + return new XCoreLowerThreadLocal(); +} + +static ArrayType *createLoweredType(Type *OriginalType) { + return ArrayType::get(OriginalType, MaxThreads); +} + +static Constant * +createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) { + SmallVector Elements(MaxThreads); + for (unsigned i = 0; i != MaxThreads; ++i) { + Elements[i] = OriginalInitializer; + } + return ConstantArray::get(NewType, Elements); +} + +static bool hasNonInstructionUse(GlobalVariable *GV) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; + ++UI) + if (!isa(*UI)) + return true; + + return false; +} + +static bool isZeroLengthArray(Type *Ty) { + ArrayType *AT = dyn_cast(Ty); + return AT && (AT->getNumElements() == 0); +} + +bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) { + Module *M = GV->getParent(); + LLVMContext &Ctx = M->getContext(); + if (!GV->isThreadLocal()) + return false; + + // Skip globals that we can't lower and leave it for the backend to error. + if (hasNonInstructionUse(GV) || + !GV->getType()->isSized() || isZeroLengthArray(GV->getType())) + return false; + + // Create replacement global. + ArrayType *NewType = createLoweredType(GV->getType()->getElementType()); + Constant *NewInitializer = createLoweredInitializer(NewType, + GV->getInitializer()); + GlobalVariable *NewGV = + new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(), + NewInitializer, "", 0, GlobalVariable::NotThreadLocal, + GV->getType()->getAddressSpace(), + GV->isExternallyInitialized()); + + // Update uses. + SmallVector Users(GV->use_begin(), GV->use_end()); + for (unsigned I = 0, E = Users.size(); I != E; ++I) { + User *U = Users[I]; + Instruction *Inst = cast(U); + IRBuilder<> Builder(Inst); + Function *GetID = Intrinsic::getDeclaration(GV->getParent(), + Intrinsic::xcore_getid); + Value *ThreadID = Builder.CreateCall(GetID); + SmallVector Indices; + Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx))); + Indices.push_back(ThreadID); + Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices); + U->replaceUsesOfWith(GV, Addr); + } + + // Remove old global. + NewGV->takeName(GV); + GV->eraseFromParent(); + return true; +} + +bool XCoreLowerThreadLocal::runOnModule(Module &M) { + // Find thread local globals. + bool MadeChange = false; + SmallVector ThreadLocalGlobals; + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ++GVI) { + GlobalVariable *GV = GVI; + if (GV->isThreadLocal()) + ThreadLocalGlobals.push_back(GV); + } + for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) { + MadeChange |= lowerGlobal(ThreadLocalGlobals[I]); + } + return MadeChange; +} diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp index 28c3d12c05fe..07e5fff1413b 100644 --- a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -46,6 +46,7 @@ public: return getTM(); } + virtual bool addPreISel(); virtual bool addInstSelector(); }; } // namespace @@ -54,6 +55,11 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) { return new XCorePassConfig(this, PM); } +bool XCorePassConfig::addPreISel() { + addPass(createXCoreLowerThreadLocalPass()); + return false; +} + bool XCorePassConfig::addInstSelector() { addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); return false;