GPGPU: Handle scalar array references
Pass the content of scalar array references to the alloca on the kernel side and do not pass them additional as normal LLVM scalar value. llvm-svn: 277699
This commit is contained in:
parent
3216f8546c
commit
00bb5a99f5
|
@ -37,7 +37,20 @@ struct SubtreeReferences {
|
|||
BlockGenerator &BlockGen;
|
||||
};
|
||||
|
||||
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr);
|
||||
/// Extract the out-of-scop values and SCEVs referenced from a ScopStmt.
|
||||
///
|
||||
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
|
||||
/// statement and the base pointers of the memory accesses. For scalar
|
||||
/// statements we force the generation of alloca memory locations and list
|
||||
/// these locations in the set of out-of-scop values as well.
|
||||
///
|
||||
/// @param Stmt The statement for which to extract the information.
|
||||
/// @param UserPtr A void pointer that can be casted to a
|
||||
/// SubtreeReferences structure.
|
||||
/// @param CreateScalarRefs Should the result include allocas of scalar
|
||||
/// references?
|
||||
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
|
||||
bool CreateScalarRefs = true);
|
||||
|
||||
class IslNodeBuilder {
|
||||
public:
|
||||
|
|
|
@ -193,17 +193,8 @@ static int findReferencesInBlock(struct SubtreeReferences &References,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/// Extract the out-of-scop values and SCEVs referenced from a ScopStmt.
|
||||
///
|
||||
/// This includes the SCEVUnknowns referenced by the SCEVs used in the
|
||||
/// statement and the base pointers of the memory accesses. For scalar
|
||||
/// statements we force the generation of alloca memory locations and list
|
||||
/// these locations in the set of out-of-scop values as well.
|
||||
///
|
||||
/// @param Stmt The statement for which to extract the information.
|
||||
/// @param UserPtr A void pointer that can be casted to a SubtreeReferences
|
||||
/// structure.
|
||||
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr) {
|
||||
isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
|
||||
bool CreateScalarRefs) {
|
||||
auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
|
||||
|
||||
if (Stmt->isBlockStmt())
|
||||
|
@ -226,7 +217,8 @@ isl_stat addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr) {
|
|||
continue;
|
||||
}
|
||||
|
||||
References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
|
||||
if (CreateScalarRefs)
|
||||
References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access));
|
||||
}
|
||||
|
||||
return isl_stat_ok;
|
||||
|
|
|
@ -242,6 +242,12 @@ private:
|
|||
/// @param Array The array for which to compute a size.
|
||||
Value *getArraySize(gpu_array_info *Array);
|
||||
|
||||
/// Prepare the kernel arguments for kernel code generation
|
||||
///
|
||||
/// @param Kernel The kernel to generate code for.
|
||||
/// @param FN The function created for the kernel.
|
||||
void prepareKernelArguments(ppcg_kernel *Kernel, Function *FN);
|
||||
|
||||
/// Create kernel function.
|
||||
///
|
||||
/// Create a kernel function located in a newly created module that can serve
|
||||
|
@ -775,7 +781,7 @@ isl_bool collectReferencesInGPUStmt(__isl_keep isl_ast_node *Node, void *User) {
|
|||
auto Stmt = (ScopStmt *)KernelStmt->u.d.stmt->stmt;
|
||||
isl_id_free(Id);
|
||||
|
||||
addReferencesFromStmt(Stmt, User);
|
||||
addReferencesFromStmt(Stmt, User, false /* CreateScalarRefs */);
|
||||
|
||||
return isl_bool_true;
|
||||
}
|
||||
|
@ -1167,6 +1173,32 @@ void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPUNodeBuilder::prepareKernelArguments(ppcg_kernel *Kernel, Function *FN) {
|
||||
auto Arg = FN->arg_begin();
|
||||
for (long i = 0; i < Kernel->n_array; i++) {
|
||||
if (!ppcg_kernel_requires_array_argument(Kernel, i))
|
||||
continue;
|
||||
|
||||
isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set);
|
||||
const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(isl_id_copy(Id));
|
||||
isl_id_free(Id);
|
||||
|
||||
if (SAI->getNumberOfDimensions() > 0) {
|
||||
Arg++;
|
||||
continue;
|
||||
}
|
||||
|
||||
Value *Alloca = BlockGen.getOrCreateScalarAlloca(SAI->getBasePtr());
|
||||
Value *ArgPtr = &*Arg;
|
||||
Type *TypePtr = SAI->getElementType()->getPointerTo();
|
||||
Value *TypedArgPtr = Builder.CreatePointerCast(ArgPtr, TypePtr);
|
||||
Value *Val = Builder.CreateLoad(TypedArgPtr);
|
||||
Builder.CreateStore(Val, Alloca);
|
||||
|
||||
Arg++;
|
||||
}
|
||||
}
|
||||
|
||||
void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
|
||||
SetVector<Value *> &SubtreeValues) {
|
||||
|
||||
|
@ -1189,6 +1221,7 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel,
|
|||
|
||||
ScopDetection::markFunctionAsInvalid(FN);
|
||||
|
||||
prepareKernelArguments(Kernel, FN);
|
||||
insertKernelIntrinsics(Kernel);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,10 @@
|
|||
; RUN: -S < %s | \
|
||||
; RUN: FileCheck -check-prefix=IR %s
|
||||
|
||||
; RUN: opt %loadPolly -polly-codegen-ppcg \
|
||||
; RUN: -disable-output -polly-acc-dump-kernel-ir < %s | \
|
||||
; RUN: FileCheck -check-prefix=KERNEL %s
|
||||
|
||||
; REQUIRES: pollyacc
|
||||
|
||||
; CODE: Code
|
||||
|
@ -58,6 +62,8 @@ bb7: ; preds = %bb1
|
|||
ret void
|
||||
}
|
||||
|
||||
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, i8* %MemRef_b)
|
||||
|
||||
; CODE: Code
|
||||
; CODE-NEXT: ====
|
||||
; CODE-NEXT: # host
|
||||
|
@ -108,6 +114,13 @@ bb7: ; preds = %bb1
|
|||
ret void
|
||||
}
|
||||
|
||||
; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A, i8* %MemRef_b)
|
||||
; KERNEL-NEXT: entry:
|
||||
; KERNEL-NEXT: %b.s2a = alloca float
|
||||
; KERNEL-NEXT: %0 = bitcast i8* %MemRef_b to float*
|
||||
; KERNEL-NEXT: %1 = load float, float* %0
|
||||
; KERNEL-NEXT: store float %1, float* %b.s2a
|
||||
|
||||
; CODE: Code
|
||||
; CODE-NEXT: ====
|
||||
; CODE-NEXT: # host
|
||||
|
|
Loading…
Reference in New Issue