GPGPU: generate control flow within the kernel
llvm-svn: 275956
This commit is contained in:
parent
c84a1995fe
commit
59ab070523
|
@ -192,6 +192,8 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
|
|||
|
||||
createKernelFunction(Kernel);
|
||||
|
||||
create(isl_ast_node_copy(Kernel->tree));
|
||||
|
||||
Builder.SetInsertPoint(&HostInsertPoint);
|
||||
IDToValue = HostIDs;
|
||||
|
||||
|
@ -304,8 +306,12 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel) {
|
|||
|
||||
Function *FN = createKernelFunctionDecl(Kernel);
|
||||
|
||||
BasicBlock *PrevBlock = Builder.GetInsertBlock();
|
||||
auto EntryBlock = BasicBlock::Create(Builder.getContext(), "entry", FN);
|
||||
|
||||
DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
DT.addNewBlock(EntryBlock, PrevBlock);
|
||||
|
||||
Builder.SetInsertPoint(EntryBlock);
|
||||
Builder.CreateRetVoid();
|
||||
Builder.SetInsertPoint(EntryBlock, EntryBlock->begin());
|
||||
|
|
|
@ -93,7 +93,7 @@
|
|||
; IR: polly.exiting:
|
||||
; IR-NEXT: br label %polly.merge_new_and_old
|
||||
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
|
||||
; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
|
||||
; KERNEL-IR-NEXT: entry:
|
||||
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
|
||||
|
@ -103,7 +103,19 @@
|
|||
; KERNEL-IR-NEXT: %t0 = zext i32 %2 to i64
|
||||
; KERNEL-IR-NEXT: %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
|
||||
; KERNEL-IR-NEXT: %t1 = zext i32 %3 to i64
|
||||
; KERNEL-IR-NEXT: br label %polly.loop_preheader
|
||||
|
||||
; KERNEL-IR-LABEL: polly.loop_exit:
|
||||
; KERNEL-IR-NEXT: ret void
|
||||
|
||||
; KERNEL-IR-LABEL: polly.loop_header:
|
||||
; KERNEL-IR-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.loop_header ]
|
||||
; KERNEL-IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
|
||||
; KERNEL-IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 0
|
||||
; KERNEL-IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
|
||||
|
||||
; KERNEL-IR-LABEL: polly.loop_preheader:
|
||||
; KERNEL-IR-NEXT: br label %polly.loop_header
|
||||
; KERNEL-IR-NEXT: }
|
||||
|
||||
; void double_parallel_loop(float A[][1024]) {
|
||||
|
|
|
@ -34,13 +34,28 @@
|
|||
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98
|
||||
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
|
||||
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) {
|
||||
; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %c0) {
|
||||
; KERNEL-IR-NEXT: entry:
|
||||
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
|
||||
; KERNEL-IR-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
; KERNEL-IR-NEXT: %t0 = zext i32 %1 to i64
|
||||
; KERNEL-IR-NEXT: br label %polly.cond
|
||||
|
||||
; KERNEL-IR-LABEL: polly.cond:
|
||||
; KERNEL-IR-NEXT: %2 = mul nsw i64 32, %b0
|
||||
; KERNEL-IR-NEXT: %3 = add nsw i64 %2, %t0
|
||||
; KERNEL-IR-NEXT: %4 = icmp sle i64 %3, 97
|
||||
; KERNEL-IR-NEXT: br i1 %4, label %polly.then, label %polly.else
|
||||
|
||||
; KERNEL-IR-LABEL: polly.merge:
|
||||
; KERNEL-IR-NEXT: ret void
|
||||
|
||||
; KERNEL-IR-LABEL: polly.then:
|
||||
; KERNEL-IR-NEXT: br label %polly.merge
|
||||
|
||||
; KERNEL-IR-LABEL: polly.else:
|
||||
; KERNEL-IR-NEXT: br label %polly.merge
|
||||
; KERNEL-IR-NEXT: }
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
|
|
@ -8,13 +8,6 @@
|
|||
; }
|
||||
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A, i64 %n) {
|
||||
; KERNEL-IR-NEXT: entry:
|
||||
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
|
||||
; KERNEL-IR-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
; KERNEL-IR-NEXT: %t0 = zext i32 %1 to i64
|
||||
; KERNEL-IR-NEXT: ret void
|
||||
; KERNEL-IR-NEXT: }
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
Loading…
Reference in New Issue