GPGPU: Generate an AST for the GPU-mapped schedule

For this we need to provide an explicit list of statements as they occur in
the polly::Scop to ppcg.

We also setup basic AST printing facilities to facilitate debugging. To allow
code reuse some (minor) changes in ppcg are have been necessary.

llvm-svn: 275436
This commit is contained in:
Tobias Grosser 2016-07-14 15:51:37 +00:00
parent 60c6002570
commit 69b4675180
6 changed files with 161 additions and 14 deletions

View File

@ -26,8 +26,11 @@
#include "isl/union_map.h"
extern "C" {
#include "cuda.h"
#include "gpu.h"
#include "gpu_print.h"
#include "ppcg.h"
#include "schedule.h"
}
#include "llvm/Support/Debug.h"
@ -41,6 +44,12 @@ static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule",
cl::desc("Dump the computed GPU Schedule"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<bool>
DumpCode("polly-acc-dump-code",
cl::desc("Dump C code describing the GPU mapping"), cl::Hidden,
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
/// Create the ast expressions for a ScopStmt.
///
/// This function is a callback for to generate the ast expressions for each
@ -256,6 +265,33 @@ public:
return PPCGScop;
}
/// Collect the list of GPU statements.
///
/// Each statement has an id, a pointer to the underlying data structure,
/// as well as a list with all memory accesses.
///
/// TODO: Initialize the list of memory accesses.
///
/// @returns A linked-list of statements.
gpu_stmt *getStatements() {
gpu_stmt *Stmts = isl_calloc_array(S->getIslCtx(), struct gpu_stmt,
std::distance(S->begin(), S->end()));
int i = 0;
for (auto &Stmt : *S) {
gpu_stmt *GPUStmt = &Stmts[i];
GPUStmt->id = Stmt.getDomainId();
// We use the pet stmt pointer to keep track of the Polly statements.
GPUStmt->stmt = (pet_stmt *)&Stmt;
GPUStmt->accesses = nullptr;
i++;
}
return Stmts;
}
/// Create a default-initialized PPCG GPU program.
///
/// @returns A new gpu grogram description.
@ -278,14 +314,90 @@ public:
PPCGProg->to_inner = nullptr;
PPCGProg->any_to_outer = nullptr;
PPCGProg->array_order = nullptr;
PPCGProg->n_stmts = 0;
PPCGProg->stmts = nullptr;
PPCGProg->n_stmts = std::distance(S->begin(), S->end());
PPCGProg->stmts = getStatements();
PPCGProg->n_array = 0;
PPCGProg->array = nullptr;
return PPCGProg;
}
struct PrintGPUUserData {
struct cuda_info *CudaInfo;
struct gpu_prog *PPCGProg;
std::vector<ppcg_kernel *> Kernels;
};
/// Print a user statement node in the host code.
///
/// We use ppcg's printing facilities to print the actual statement and
/// additionally build up a list of all kernels that are encountered in the
/// host ast.
///
/// @param P The printer to print to
/// @param Options The printing options to use
/// @param Node The node to print
/// @param User A user pointer to carry additional data. This pointer is
/// expected to be of type PrintGPUUserData.
///
/// @returns A printer to which the output has been printed.
static __isl_give isl_printer *
printHostUser(__isl_take isl_printer *P,
__isl_take isl_ast_print_options *Options,
__isl_take isl_ast_node *Node, void *User) {
auto Data = (struct PrintGPUUserData *)User;
auto Id = isl_ast_node_get_annotation(Node);
if (Id) {
auto Kernel = (struct ppcg_kernel *)isl_id_get_user(Id);
isl_id_free(Id);
Data->Kernels.push_back(Kernel);
}
return print_host_user(P, Options, Node, User);
}
/// Print C code corresponding to the control flow in @p Kernel.
///
/// @param Kernel The kernel to print
void printKernel(ppcg_kernel *Kernel) {
auto *P = isl_printer_to_str(S->getIslCtx());
P = isl_printer_set_output_format(P, ISL_FORMAT_C);
auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
P = isl_ast_node_print(Kernel->tree, P, Options);
char *String = isl_printer_get_str(P);
printf("%s\n", String);
free(String);
isl_printer_free(P);
}
/// Print C code corresponding to the GPU code described by @p Tree.
///
/// @param Tree An AST describing GPU code
/// @param PPCGProg The PPCG program from which @Tree has been constructed.
void printGPUTree(isl_ast_node *Tree, gpu_prog *PPCGProg) {
auto *P = isl_printer_to_str(S->getIslCtx());
P = isl_printer_set_output_format(P, ISL_FORMAT_C);
PrintGPUUserData Data;
Data.PPCGProg = PPCGProg;
auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
Options =
isl_ast_print_options_set_print_user(Options, printHostUser, &Data);
P = isl_ast_node_print(Tree, P, Options);
char *String = isl_printer_get_str(P);
printf("# host\n");
printf("%s\n", String);
free(String);
isl_printer_free(P);
for (auto Kernel : Data.Kernels) {
printf("# kernel%d\n", Kernel->id);
printKernel(Kernel);
}
}
// Generate a GPU program using PPCG.
//
// GPU mapping consists of multiple steps:
@ -322,10 +434,12 @@ public:
int has_permutable = has_any_permutable_node(Schedule);
if (!has_permutable || has_permutable < 0)
if (!has_permutable || has_permutable < 0) {
Schedule = isl_schedule_free(Schedule);
else
} else {
Schedule = map_to_device(PPCGGen, Schedule);
PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule));
}
if (DumpSchedule) {
isl_printer *P = isl_printer_to_str(S->getIslCtx());
@ -341,6 +455,15 @@ public:
isl_printer_free(P);
}
if (DumpCode) {
printf("Code\n");
printf("====\n");
if (PPCGGen->tree)
printGPUTree(PPCGGen->tree, PPCGProg);
else
printf("No code generated\n");
}
isl_schedule_free(Schedule);
return PPCGGen;

View File

@ -153,20 +153,20 @@ static __isl_give isl_printer *copy_array_from_device(
return p;
}
static void print_reverse_list(FILE *out, int len, int *list)
static isl_printer *print_reverse_list(isl_printer *p, int len, int *list)
{
int i;
if (len == 0)
return;
return p;
fprintf(out, "(");
p = isl_printer_print_str(p, "(");
for (i = 0; i < len; ++i) {
if (i)
fprintf(out, ", ");
fprintf(out, "%d", list[len - 1 - i]);
p = isl_printer_print_str(p, ", ");
p = isl_printer_print_int(p, list[len - 1 - i]);
}
fprintf(out, ")");
return isl_printer_print_str(p, ")");
}
/* Print the effective grid size as a list of the sizes in each
@ -534,7 +534,7 @@ struct print_host_user_data {
* In case of a kernel launch, print a block of statements that
* defines the grid and the block and then launches the kernel.
*/
static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
__isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
__isl_take isl_ast_print_options *print_options,
__isl_keep isl_ast_node *node, void *user)
{
@ -569,8 +569,7 @@ static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
p = isl_printer_print_str(p, "dim3 k");
p = isl_printer_print_int(p, kernel->id);
p = isl_printer_print_str(p, "_dimBlock");
print_reverse_list(isl_printer_get_file(p),
kernel->n_block, kernel->block_dim);
p = print_reverse_list(p, kernel->n_block, kernel->block_dim);
p = isl_printer_print_str(p, ";");
p = isl_printer_end_line(p);
@ -600,7 +599,9 @@ static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
p = isl_printer_start_line(p);
p = isl_printer_end_line(p);
#if 0
print_kernel(data->prog, kernel, data->cuda);
#endif
return p;
}

View File

@ -6,5 +6,8 @@
int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
const char *input);
__isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
__isl_take isl_ast_print_options *print_options,
__isl_keep isl_ast_node *node, void *user);
#endif

View File

@ -2297,7 +2297,7 @@ static isl_bool update_depth(__isl_keep isl_schedule_node *node, void *user)
* The ASTs for the device code are embedded in ppcg_kernel objects
* attached to the leaf nodes that call "kernel".
*/
static __isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
__isl_take isl_schedule *schedule)
{
struct ppcg_at_domain_data data;

View File

@ -367,4 +367,6 @@ __isl_give isl_schedule *get_schedule(struct gpu_gen *gen);
int has_any_permutable_node(__isl_keep isl_schedule *schedule);
__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
__isl_take isl_schedule *schedule);
__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
__isl_take isl_schedule *schedule);
#endif

View File

@ -3,6 +3,10 @@
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=SCHED %s
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=CODE %s
; REQUIRES: pollyacc
; CHECK: Stmt_bb5
@ -44,6 +48,20 @@
; SCHED: coincident: [ 1, 1 ]
; SCHED: - filter: "{ }"
; CODE: Code
; CODE: ====
; CODE: # host
; CODE: {
; CODE: dim3 k0_dimBlock(16, 32);
; CODE: dim3 k0_dimGrid(32, 32);
; CODE: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE: }
; CODE: # kernel0
; CODE: for (int c3 = 0; c3 <= 1; c3 += 1)
; CODE: Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3);
; void double_parallel_loop(float A[][1024]) {
; for (long i = 0; i < 1024; i++)
; for (long j = 0; j < 1024; j++)