Remove use of llvm.codegen intrinsic for GPGPU codegen

We use llvm.codegen intrinsic to generate code for embedded LLVM-IR
strings. The reason we introduce such a intrinsic is that previous
clang/opt tools was NOT linked with various LLVM targets and their
AsmParsers and AsmPrinters. Since clang/opt been linked with all the
needed libraries, we no longer need the llvm.codegen intrinsic.

llvm-svn: 211573
This commit is contained in:
Yabin Hu 2014-06-24 08:11:36 +00:00
parent ce8245b5fd
commit cc91169fd7
6 changed files with 192 additions and 526 deletions

View File

@ -159,13 +159,6 @@ private:
/// will be copied from host to device.
Function *createSubfunctionDefinition(int NumArgs);
/// @brief Extract all the ptx related subfunctions into a new module.
///
/// @param M Current module.
/// @return The generated module containing only gpu related
/// subfunctions.
Module *extractPTXFunctionsFromModule(const Module *M);
/// @brief Get the Value of CUDA block width.
Value *getCUDABlockWidth();

View File

@ -22,10 +22,12 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@ -507,44 +509,98 @@ Value *PTXGenerator::getOutputArraySizeInBytes() {
return ConstantInt::get(getInt64Type(), OutputBytes);
}
static Module *extractPTXFunctionsFromModule(const Module *M,
const StringRef &Triple) {
llvm::ValueToValueMapTy VMap;
Module *New = new Module("TempGPUModule", M->getContext());
New->setTargetTriple(Triple::normalize(Triple));
// Loop over the functions in the module, making external functions as before
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
if (!I->isDeclaration() &&
(I->getCallingConv() == CallingConv::PTX_Device ||
I->getCallingConv() == CallingConv::PTX_Kernel)) {
Function *NF =
Function::Create(cast<FunctionType>(I->getType()->getElementType()),
I->getLinkage(), I->getName(), New);
NF->copyAttributesFrom(I);
VMap[I] = NF;
Function::arg_iterator DestI = NF->arg_begin();
for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
++J) {
DestI->setName(J->getName());
VMap[J] = DestI++;
}
SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
CloneFunctionInto(NF, I, VMap, /*ModuleLevelChanges=*/true, Returns);
}
}
return New;
}
static bool createASMAsString(Module *New, const StringRef &Triple,
const StringRef &MCPU, const StringRef &Features,
std::string &ASM) {
llvm::Triple TheTriple(Triple::normalize(Triple));
std::string ErrMsg;
const Target *TheTarget =
TargetRegistry::lookupTarget(TheTriple.getTriple(), ErrMsg);
if (!TheTarget) {
errs() << ErrMsg << "\n";
return false;
}
TargetOptions Options;
std::unique_ptr<TargetMachine> target(TheTarget->createTargetMachine(
TheTriple.getTriple(), MCPU, Features, Options));
assert(target.get() && "Could not allocate target machine!");
TargetMachine &Target = *target.get();
// Build up all of the passes that we want to do to the module.
PassManager PM;
TargetLibraryInfo *TLI = new TargetLibraryInfo(TheTriple);
PM.add(TLI);
PM.add(new DataLayoutPass(*Target.getDataLayout()));
Target.addAnalysisPasses(PM);
{
raw_string_ostream NameROS(ASM);
formatted_raw_ostream FOS(NameROS);
// Ask the target to add backend passes as necessary.
int UseVerifier = true;
if (Target.addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_AssemblyFile,
UseVerifier)) {
errs() << "The target does not support generation of this file type!\n";
return false;
}
PM.run(*New);
FOS.flush();
}
return true;
}
Value *PTXGenerator::createPTXKernelFunction(Function *SubFunction) {
Module *M = getModule();
Module *GPUModule = extractPTXFunctionsFromModule(M, GPUTriple);
std::string LLVMKernelStr;
raw_string_ostream NameROS(LLVMKernelStr);
formatted_raw_ostream FOS(NameROS);
FOS << "target triple = \"" << GPUTriple << "\"\n";
SubFunction->print(FOS);
// Insert ptx intrinsics into the kernel string.
for (Module::iterator I = M->begin(), E = M->end(); I != E;) {
Function *F = I++;
// Function must be a prototype and unused.
if (F->isDeclaration() && F->isIntrinsic()) {
switch (F->getIntrinsicID()) {
case Intrinsic::ptx_read_nctaid_x:
case Intrinsic::ptx_read_nctaid_y:
case Intrinsic::ptx_read_ctaid_x:
case Intrinsic::ptx_read_ctaid_y:
case Intrinsic::ptx_read_ntid_x:
case Intrinsic::ptx_read_ntid_y:
case Intrinsic::ptx_read_tid_x:
case Intrinsic::ptx_read_tid_y:
F->print(FOS);
break;
default:
break;
}
}
if (!createASMAsString(GPUModule, GPUTriple, "sm_20" /*MCPU*/,
"" /*Features*/, LLVMKernelStr)) {
errs() << "Generate ptx string failed!\n";
return NULL;
}
Value *LLVMKernel =
Builder.CreateGlobalStringPtr(LLVMKernelStr, "llvm_kernel");
Value *MCPU = Builder.CreateGlobalStringPtr("sm_10", "mcpu");
Value *Features = Builder.CreateGlobalStringPtr("", "cpu_features");
Function *GetDeviceKernel = Intrinsic::getDeclaration(M, Intrinsic::codegen);
return Builder.CreateCall3(GetDeviceKernel, LLVMKernel, MCPU, Features);
delete GPUModule;
return LLVMKernel;
}
Value *PTXGenerator::getPTXKernelEntryName(Function *SubFunction) {

View File

@ -0,0 +1,72 @@
; REQUIRES: nvptx-registered-target
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed+gpu -enable-polly-gpgpu -polly-gpgpu-triple=nvptx64-unknown-unknown -polly-codegen < %s -S | FileCheck %s
;int A[1024];
;int gpu() {
; int i;
;
; for(i = 0; i < 1024; i++)
; A[i] = i*128 + 508;
;
; return 0;
;}
;
;int main() {
; int b = gpu();
; return 0;
;}
; ModuleID = '1d_parallel.s'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x i32] zeroinitializer, align 16
; Function Attrs: nounwind uwtable
define i32 @gpu() #0 {
br label %.split
.split: ; preds = %0
br label %1
; <label>:1 ; preds = %.split, %1
%indvar = phi i64 [ 0, %.split ], [ %indvar.next, %1 ]
%2 = mul i64 %indvar, 128
%3 = add i64 %2, 508
%4 = trunc i64 %3 to i32
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar
store i32 %4, i32* %scevgep, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp ne i64 %indvar.next, 1024
br i1 %exitcond, label %1, label %5
; <label>:5 ; preds = %1
ret i32 0
}
; Function Attrs: nounwind uwtable
define i32 @main() #0 {
br label %.split
.split: ; preds = %0
%1 = tail call i32 @gpu()
ret i32 0
}
; CHECK: call void @polly_initDevice
; CHECK: call void @polly_getPTXModule
; CHECK: call void @polly_getPTXKernelEntry
; CHECK: call void @polly_allocateMemoryForHostAndDevice
; CHECK: call void @polly_setKernelParameters
; CHECK: call void @polly_startTimerByCudaEvent
; CHECK: call void @polly_launchKernel
; CHECK: call void @polly_copyFromDeviceToHost
; CHECK: call void @polly_stopTimerByCudaEvent
; CHECK: call void @polly_cleanupGPGPUResources
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = metadata !{metadata !"clang version 3.5.0 "}

View File

@ -0,0 +1,17 @@
{
"context" : "{ : }",
"name" : "%1 => %5",
"statements" : [
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_1[i0] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_1[i0] : i0 >= 0 and i0 <= 1023 }",
"name" : "Stmt_1",
"schedule" : "{ Stmt_1[i0] -> scattering[0, i0, 0] }"
}
]
}

View File

@ -0,0 +1,17 @@
{
"context" : "{ : }",
"name" : "%1 => %5",
"statements" : [
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_1[i0] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_1[i0] : i0 >= 0 and i0 <= 1023 }",
"name" : "Stmt_1",
"schedule" : "{ Stmt_1[i0] -> scattering[0, o0, o1, o2, o3] : o0 >= 0 and o0 <= 1 and o1 >= 0 and o1 <= 1 and o2 >= 0 and o2 <= 15 and o3 >= 0 and o3 <= 15 and i0 = 512o0 + 256o1 + 16o2 + o3 }"
}
]
}

View File

@ -1,489 +0,0 @@
From 7e36390f24f6ceaea7bc2ba4adcd55d06cf73439 Mon Sep 17 00:00:00 2001
From: Yabin Hu <yabin.hwu@gmail.com>
Date: Thu, 29 Nov 2012 16:08:29 +0800
Subject: [PATCH] Add llvm.codegen intrinsic.
The llvm.codegen intrinsic generates code for embedded LLVM-IR
strings. Each call to the intrinsic is replaced by a pointer to
the newly generated target code. The code generation target can be
different to the one of the parent module.
---
docs/LangRef.html | 36 +++
include/llvm/CodeGen/Passes.h | 3 +
include/llvm/InitializePasses.h | 1 +
include/llvm/Intrinsics.td | 4 +
lib/CodeGen/CMakeLists.txt | 1 +
lib/CodeGen/CodeGen.cpp | 1 +
lib/CodeGen/CodeGenIntrinsic.cpp | 227 ++++++++++++++++++++
lib/CodeGen/Passes.cpp | 3 +
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +
lib/Target/LLVMBuild.txt | 2 +-
lib/VMCore/Verifier.cpp | 10 +
.../CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll | 28 +++
test/CodeGen/X86/EmbeddedCG/lit.local.cfg | 5 +
13 files changed, 322 insertions(+), 1 deletions(-)
create mode 100644 lib/CodeGen/CodeGenIntrinsic.cpp
create mode 100644 test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
create mode 100644 test/CodeGen/X86/EmbeddedCG/lit.local.cfg
diff --git a/docs/LangRef.html b/docs/LangRef.html
index cfc1c7d..eae069e 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -243,6 +243,7 @@
<li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
<li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
<li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
+ <li><a href="#int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a></li>
</ol>
</li>
<li><a href="#int_libc">Standard C Library Intrinsics</a>
@@ -7249,6 +7250,41 @@ LLVM</a>.</p>
</div>
+<!-- _______________________________________________________________________ -->
+<h4>
+ <a name="int_codegen_intrinsic">'<tt>llvm.codegen</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+ declare i8* @llvm.codegen(i8* &lt;IRString&gt;, i8* &lt;MCPU&gt;, i8* &lt;
+ Features&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.codegen</tt>' intrinsic uses the LLVM back ends to generate
+ code for embedded LLVM-IR strings. The code generation target can be
+ different to the one of the parent module.</p>
+
+<h5>Arguments:</h5>
+<p><tt>IRString</tt> is a string containing LLVM-IR.</p>
+<p><tt>MCPU</tt> is the name of the target CPU.</p>
+<p><tt>Features</tt> is the string representation of the additional target
+ features.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.codegen</tt>' intrinsic transforms a string containing LLVM IR
+ to target assembly code. Calls to the intrinsic are replaced by a pointer to
+ the newly generated target code. In case LLVM can not generate code (e.g. the
+ target is not available), the call to the intrinsic is replaced by a i8 NULL
+ pointer.Users of this intrinsic should make sure the target triple is
+ properly set in the &lt;IRString&gt;. Inputs to both &lt;MCPU&gt; and
+ &lt;Features&gt; parameters can be null pointers.</p>
+
+</div>
+
</div>
<!-- ======================================================================= -->
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 44c9676..57b3aa2 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -432,6 +432,9 @@ namespace llvm {
/// branch folding).
extern char &GCMachineCodeAnalysisID;
+ /// CodeGenIntrinsic Pass - Create target code for embedded LLVM-IR strings.
+ FunctionPass *createCodeGenIntrinsicPass();
+
/// Deleter Pass - Releases GC metadata.
///
FunctionPass *createGCInfoDeleter();
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index e06b892..fe8655e 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -93,6 +93,7 @@ void initializeCorrelatedValuePropagationPass(PassRegistry&);
void initializeDAEPass(PassRegistry&);
void initializeDAHPass(PassRegistry&);
void initializeDCEPass(PassRegistry&);
+void initializeCodeGenIntrinsicPass(PassRegistry&);
void initializeDSEPass(PassRegistry&);
void initializeDeadInstEliminationPass(PassRegistry&);
void initializeDeadMachineInstructionElimPass(PassRegistry&);
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index d3a548c..a60d2bb 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -238,6 +238,10 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
// guard to the correct place on the stack frame.
def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
+//===----------------- Code Generation for Embedded LLVM-IR ---------------===//
+def int_codegen : Intrinsic<[llvm_ptr_ty],
+ [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty]>;
+
//===------------------- Standard C Library Intrinsics --------------------===//
//
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 7a20ff6..8e1ab9a 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMCodeGen
CalcSpillWeights.cpp
CallingConvLower.cpp
CodeGen.cpp
+ CodeGenIntrinsic.cpp
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index a53f6f8..702ee18 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBranchFolderPassPass(Registry);
initializeCalculateSpillWeightsPass(Registry);
+ initializeCodeGenIntrinsicPass(Registry);
initializeCodePlacementOptPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeEarlyIfConverterPass(Registry);
diff --git a/lib/CodeGen/CodeGenIntrinsic.cpp b/lib/CodeGen/CodeGenIntrinsic.cpp
new file mode 100644
index 0000000..cf8aa54
--- /dev/null
+++ b/lib/CodeGen/CodeGenIntrinsic.cpp
@@ -0,0 +1,227 @@
+//===-- CodeGenIntrinsic.cpp - CodeGen Intrinsic --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm.codegen intrinsic.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+namespace {
+ /// ASMGenerator generates target-specific assembly code from LLVM IR.
+ class ASMGenerator {
+ public:
+ ASMGenerator() {}
+
+ /// generate - Generates a target code string from a LLVM IR Value.
+ bool generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr,
+ std::string &ASM);
+
+ private:
+ bool getStringFromConstantExpr(Value *ConstData, std::string &Out) const;
+ };
+
+ /// CodeGenIntrinsic - This pass replaces each call to the llvm.codegen
+ /// intrinsic with a string generated by ASMGenerator.
+ class CodeGenIntrinsic : public FunctionPass {
+ public:
+ static char ID;
+
+ CodeGenIntrinsic();
+ const char *getPassName() const;
+ virtual bool runOnFunction(Function &F);
+ };
+}
+
+// -----------------------------------------------------------------------------
+static bool getTargetMachineFromModule(Module *M, const StringRef &TripleStr,
+ const StringRef &MCPU,
+ const StringRef &Features,
+ TargetMachine *&TM) {
+ std::string ErrMsg;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
+ if (!TheTarget) {
+ errs() << ErrMsg << "\n";
+ return false;
+ }
+
+ TargetOptions Options;
+ TM = TheTarget->createTargetMachine(TripleStr, MCPU, Features, Options);
+ assert(TM && "Could not allocate target machine!");
+ return true;
+}
+
+static bool createASMAsString(Module *New, const StringRef &Triple,
+ const StringRef &MCPU, const StringRef &Features,
+ std::string &ASM) {
+ TargetMachine *Target;
+ if (!getTargetMachineFromModule(New, Triple, MCPU, Features, Target)) {
+ return false;
+ }
+
+ // Build up all of the passes that we want to do to the module.
+ PassManager PM;
+
+ // Get the data layout of the new module. If it is empty, return false.
+ const std::string &ModuleDataLayout = New->getDataLayout();
+ if (ModuleDataLayout.empty())
+ return false;
+
+ {
+ raw_string_ostream NameROS(ASM);
+ formatted_raw_ostream FOS(NameROS);
+
+ // Ask the target to add backend passes as necessary.
+ int UseVerifier = true;
+ if (Target->addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_AssemblyFile,
+ UseVerifier)) {
+ errs() << "CodeGen Intrinsic: target does not support generation of this "
+ << "file type!\n";
+
+ return false;
+ }
+
+ PM.run(*New);
+ FOS.flush();
+ }
+
+ delete Target;
+ return true;
+}
+
+bool ASMGenerator::getStringFromConstantExpr(Value *ConstData,
+ std::string &Out) const {
+ bool Result = false;
+ if (ConstantExpr *U = dyn_cast<ConstantExpr>(ConstData)) {
+ Value *R = U->getOperand(0);
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(R)) {
+ Constant *C = GV->getInitializer();
+ if (ConstantDataArray *CA = dyn_cast<ConstantDataArray>(C)) {
+ Out = CA->getAsString();
+ Result = true;
+ }
+ }
+ }
+ return Result;
+}
+
+bool ASMGenerator::generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr,
+ std::string &ASM) {
+ std::string Kernel;
+ if (!getStringFromConstantExpr(IRStr, Kernel))
+ return false;
+
+ std::string MCPU;
+ if (!getStringFromConstantExpr(MCPUStr, MCPU))
+ MCPU = "";
+
+ std::string Features;
+ if (!getStringFromConstantExpr(FeaturesStr, Features))
+ Features = "";
+
+ SMDiagnostic ErrorMessage;
+ LLVMContext Context;
+ std::auto_ptr<Module> TempModule(
+ ParseAssemblyString(Kernel.c_str(), 0, ErrorMessage, Context));
+
+ Triple TheTriple(TempModule->getTargetTriple());
+ const std::string TripleStr = TheTriple.getTriple();
+ if(TripleStr.empty()) {
+ errs() << "error: Target triple isn't set correctly for the new module.\n";
+ return false;
+ }
+
+ return createASMAsString(TempModule.get(), TripleStr.data(), MCPU.data(),
+ Features.data(), ASM);
+}
+
+// -----------------------------------------------------------------------------
+INITIALIZE_PASS(CodeGenIntrinsic, "codegen-intrinsic", "CodeGen Intrinsic",
+ false, false)
+
+FunctionPass *llvm::createCodeGenIntrinsicPass() {
+ return new CodeGenIntrinsic();
+}
+
+char CodeGenIntrinsic::ID = 0;
+
+CodeGenIntrinsic::CodeGenIntrinsic()
+ : FunctionPass(ID) {
+}
+
+const char *CodeGenIntrinsic::getPassName() const {
+ return "Lowering CodeGen Intrinsic.";
+}
+
+bool CodeGenIntrinsic::runOnFunction(Function &F) {
+ bool MadeChange = false;
+ Module *M = F.getParent();
+ if (Function *CG = M->getFunction("llvm.codegen")) {
+ for (Function::use_iterator I = CG->use_begin(), E = CG->use_end();
+ I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(*I)) {
+ if (&F != CI->getParent()->getParent())
+ continue;
+
+ std::string ASM;
+ ASMGenerator *Generator = new ASMGenerator();
+ IRBuilder<> Builder(CI->getParent(), CI);
+ Value *St;
+ if (!Generator->generate(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), ASM)) {
+ Type *Ty= CG->getReturnType();
+ St = Constant::getNullValue(Ty);
+ } else {
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims?
+ // Seems that, short of multithreaded LLVM, it should be safe;
+ // all that is necessary is that a simple Module::iterator loop
+ // not be invalidated. Appending to the GlobalVariable list is
+ // safe in that sense.
+ //
+ // All the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this
+ // transformation needs to be a ModulePass (which means it
+ // cannot be in the 'llc' pipeline (which uses a
+ // FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ St = Builder.CreateGlobalStringPtr(ASM, "ASM");
+ }
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ // We should erase the unused globals from current module. But we
+ // can't do this within a FunctionPass.
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 526d994..1de0c63 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -369,6 +369,9 @@ void TargetPassConfig::addIRPasses() {
addPass(createGCLoweringPass());
+ // Generate target code for embedded LLVM-IR strings.
+ addPass(createCodeGenIntrinsicPass());
+
// Make sure that no unreachable blocks are instruction selected.
addPass(createUnreachableBlockEliminationPass());
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 56e774c..97006c0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5169,6 +5169,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::donothing:
// ignore
return 0;
+ case Intrinsic::codegen:
+ llvm_unreachable("failed to lower codegen intrinsic!");
}
}
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index eb6c779..a54f57c 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -45,7 +45,7 @@ parent = Libraries
type = Library
name = Target
parent = Libraries
-required_libraries = Core MC Support
+required_libraries = Core MC Support AsmParser
; This is a special group whose required libraries are extended (by llvm-build)
; with every built target, which makes it easy for tools to include every
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 3782957..896772a 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1952,6 +1952,16 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
"llvm.invariant.end parameter #2 must be a constant integer", &CI);
break;
+ case Intrinsic::codegen:
+ Assert1(isa<ConstantExpr>(CI.getArgOperand(0)),
+ "llvm.codegen parameter #1 must be a constant expression", &CI);
+ Assert1(isa<ConstantExpr>(CI.getArgOperand(1)) ||
+ isa<ConstantPointerNull>(CI.getArgOperand(1)),
+ "llvm.codegen parameter #2 must be a constant expression", &CI);
+ Assert1(isa<ConstantExpr>(CI.getArgOperand(2)) ||
+ isa<ConstantPointerNull>(CI.getArgOperand(2)),
+ "llvm.codegen parameter #3 must be a constant expression", &CI);
+ break;
}
}
diff --git a/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
new file mode 100644
index 0000000..73d34e1
--- /dev/null
+++ b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; ModuleID = 'embedded-codegen-ptx.ll'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+@llvm_kernel = private unnamed_addr constant [1940 x i8] c"target triple = \22nvptx-unknown-unknown\22\0A\0Adefine internal ptx_kernel void @gpu_codegen.ptx_subfn(i8* %ptx.Array) {\0Aptx.setup:\0A %0 = bitcast i8* %ptx.Array to [128 x [128 x i32]]*\0A %1 = call i32 @llvm.ptx.read.nctaid.x()\0A %2 = zext i32 %1 to i64\0A %3 = call i32 @llvm.ptx.read.nctaid.y()\0A %4 = zext i32 %3 to i64\0A %5 = call i32 @llvm.ptx.read.ntid.x()\0A %6 = zext i32 %5 to i64\0A %7 = call i32 @llvm.ptx.read.ntid.y()\0A %8 = zext i32 %7 to i64\0A %9 = call i32 @llvm.ptx.read.ctaid.x()\0A %10 = zext i32 %9 to i64\0A %11 = call i32 @llvm.ptx.read.ctaid.y()\0A %12 = zext i32 %11 to i64\0A %13 = call i32 @llvm.ptx.read.tid.x()\0A %14 = zext i32 %13 to i64\0A %15 = call i32 @llvm.ptx.read.tid.y()\0A %16 = zext i32 %15 to i64\0A br label %ptx.loop_body\0A\0Aptx.exit: ; preds = %polly.stmt.for.body3\0A ret void\0A\0Aptx.loop_body: ; preds = %ptx.setup\0A %p_gpu_index_i = mul i64 %12, %2\0A %17 = add i64 %p_gpu_index_i, %10\0A %p_gpu_index_j = mul i64 %16, %6\0A %18 = add i64 %p_gpu_index_j, %14\0A br label %polly.stmt.for.body3\0A\0Apolly.stmt.for.body3: ; preds = %ptx.loop_body\0A %19 = trunc i64 %17 to i32\0A %p_mul = shl nsw i32 %19, 7\0A %20 = trunc i64 %18 to i32\0A %p_add = add nsw i32 %p_mul, %20\0A %21 = trunc i64 %17 to i32\0A %22 = trunc i64 %18 to i32\0A %p_arrayidx4 = getelementptr inbounds [128 x [128 x i32]]* %0, i32 0, i32 %21, i32 %22\0A store i32 %p_add, i32* %p_arrayidx4\0A br label %ptx.exit\0A}\0A\0Adeclare i32 @llvm.ptx.read.nctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.nctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.y() nounwind readnone\0A\00"
+
+@.str = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+define i32 @gpu_codegen() nounwind {
+entry:
+ %0 = call i8* @llvm.codegen(i8* getelementptr inbounds ([1940 x i8]* @llvm_kernel, i32 0, i32 0), i8* null, i8* null)
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0)
+ ret i32 0
+}
+
+define i32 @main() nounwind {
+entry:
+ %call = call i32 @gpu_codegen()
+ ret i32 0
+}
+
+declare i8* @llvm.codegen(i8*, i8*, i8*) nounwind
+
+declare i32 @printf(i8*, ...) nounwind
+
+; CHECK: gpu_codegen_2E_ptx_subfn
diff --git a/test/CodeGen/X86/EmbeddedCG/lit.local.cfg b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg
new file mode 100644
index 0000000..7180c84
--- /dev/null
+++ b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'NVPTX' in targets:
+ config.unsupported = True
--
1.7.6.5