GPGPU: Handle scalar array references

Pass the content of scalar array references to the alloca on the kernel side
and do not pass them additional as normal LLVM scalar value.

llvm-svn: 277699
This commit is contained in:
Tobias Grosser 2016-08-04 06:55:59 +00:00
parent 3216f8546c
commit 00bb5a99f5
4 changed files with 65 additions and 14 deletions

View File

@ -37,7 +37,20 @@ struct SubtreeReferences {
BlockGenerator &BlockGen;
};
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr);
/// Extract the out-of-scop values and SCEVs referenced from a ScopStmt.
///
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
/// statement and the base pointers of the memory accesses. For scalar
/// statements we force the generation of alloca memory locations and list
/// these locations in the set of out-of-scop values as well.
///
/// @param Stmt The statement for which to extract the information.
/// @param UserPtr A void pointer that can be casted to a
/// SubtreeReferences structure.
/// @param CreateScalarRefs Should the result include allocas of scalar
/// references?
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
bool CreateScalarRefs = true);
class IslNodeBuilder {
public:

View File

@ -193,17 +193,8 @@ static int findReferencesInBlock(struct SubtreeReferences &References,
return 0;
}
/// Extract the out-of-scop values and SCEVs referenced from a ScopStmt.
///
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
/// statement and the base pointers of the memory accesses. For scalar
/// statements we force the generation of alloca memory locations and list
/// these locations in the set of out-of-scop values as well.
///
/// @param Stmt The statement for which to extract the information.
/// @param UserPtr A void pointer that can be casted to a SubtreeReferences
/// structure.
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr) {
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
bool CreateScalarRefs) {
auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
if (Stmt->isBlockStmt())
@ -226,7 +217,8 @@ isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr) {
continue;
}
References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
if (CreateScalarRefs)
References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
}
return isl_stat_ok;

View File

@ -242,6 +242,12 @@ private:
/// @param Array The array for which to compute a size.
Value *getArraySize(gpu_array_info *Array);
/// Prepare the kernel arguments for kernel code generation
///
/// @param Kernel The kernel to generate code for.
/// @param FN The function created for the kernel.
void prepareKernelArguments(ppcg_kernel *Kernel, Function *FN);
/// Create kernel function.
///
/// Create a kernel function located in a newly created module that can serve
@ -775,7 +781,7 @@ isl_bool collectReferencesInGPUStmt(__isl_keep isl_ast_node *Node, void *User) {
auto Stmt = (ScopStmt *)KernelStmt->u.d.stmt->stmt;
isl_id_free(Id);
addReferencesFromStmt(Stmt, User);
addReferencesFromStmt(Stmt, User, false /* CreateScalarRefs */);
return isl_bool_true;
}
@ -1167,6 +1173,32 @@ void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) {
}
}
void GPUNodeBuilder::prepareKernelArguments(ppcg_kernel *Kernel, Function *FN) {
auto Arg = FN->arg_begin();
for (long i = 0; i < Kernel->n_array; i++) {
if (!ppcg_kernel_requires_array_argument(Kernel, i))
continue;
isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set);
const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(isl_id_copy(Id));
isl_id_free(Id);
if (SAI->getNumberOfDimensions() > 0) {
Arg++;
continue;
}
Value *Alloca = BlockGen.getOrCreateScalarAlloca(SAI->getBasePtr());
Value *ArgPtr = &*Arg;
Type *TypePtr = SAI->getElementType()->getPointerTo();
Value *TypedArgPtr = Builder.CreatePointerCast(ArgPtr, TypePtr);
Value *Val = Builder.CreateLoad(TypedArgPtr);
Builder.CreateStore(Val, Alloca);
Arg++;
}
}
void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
SetVector<Value *> &SubtreeValues) {
@ -1189,6 +1221,7 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
ScopDetection::markFunctionAsInvalid(FN);
prepareKernelArguments(Kernel, FN);
insertKernelIntrinsics(Kernel);
}

View File

@ -6,6 +6,10 @@
; RUN: -S < %s | \
; RUN: FileCheck -check-prefix=IR %s
; RUN: opt %loadPolly -polly-codegen-ppcg \
; RUN: -disable-output -polly-acc-dump-kernel-ir < %s | \
; RUN: FileCheck -check-prefix=KERNEL %s
; REQUIRES: pollyacc
; CODE: Code
@ -58,6 +62,8 @@ bb7: ; preds = %bb1
ret void
}
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, i8* %MemRef_b)
; CODE: Code
; CODE-NEXT: ====
; CODE-NEXT: # host
@ -108,6 +114,13 @@ bb7: ; preds = %bb1
ret void
}
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, i8* %MemRef_b)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %b.s2a = alloca float
; KERNEL-NEXT: %0 = bitcast i8* %MemRef_b to float*
; KERNEL-NEXT: %1 = load float, float* %0
; KERNEL-NEXT: store float %1, float* %b.s2a
; CODE: Code
; CODE-NEXT: ====
; CODE-NEXT: # host