[GPGPU] Do not create copy statements when targetting managed memory
Summary: They are not used and consequently do not even need to be computed. This reduces the overall compile time for our kernel from 1m33s to 17s. Reviewers: Meinersbur, bollu, singam-sanjay Reviewed By: bollu Subscribers: nemanjai, pollydev, llvm-commits, kbarton Tags: #polly Differential Revision: https://reviews.llvm.org/D36868 llvm-svn: 311157
This commit is contained in:
parent
7e680f15a5
commit
861a387fac
|
@ -3140,7 +3140,8 @@ public:
|
|||
DEBUG(dbgs() << getUniqueScopName(S)
|
||||
<< " does not have permutable bands. Bailing out\n";);
|
||||
} else {
|
||||
Schedule = map_to_device(PPCGGen, Schedule);
|
||||
const bool CreateTransferToFromDevice = !PollyManagedMemory;
|
||||
Schedule = map_to_device(PPCGGen, Schedule, CreateTransferToFromDevice);
|
||||
PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule));
|
||||
}
|
||||
|
||||
|
|
|
@ -5277,7 +5277,7 @@ static __isl_give isl_schedule_node *add_init_clear_device(
|
|||
* around the entire schedule.
|
||||
*/
|
||||
__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
|
||||
__isl_take isl_schedule *schedule)
|
||||
__isl_take isl_schedule *schedule, int to_from_device)
|
||||
{
|
||||
isl_schedule_node *node;
|
||||
isl_set *context;
|
||||
|
@ -5309,7 +5309,12 @@ __isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
|
|||
prefix = isl_union_map_preimage_domain_union_pw_multi_aff(prefix,
|
||||
contraction);
|
||||
node = mark_kernels(gen, node);
|
||||
node = add_to_from_device(node, domain, prefix, gen->prog);
|
||||
if (to_from_device) {
|
||||
node = add_to_from_device(node, domain, prefix, gen->prog);
|
||||
} else {
|
||||
isl_union_set_free(domain);
|
||||
isl_union_map_free(prefix);
|
||||
}
|
||||
node = isl_schedule_node_root(node);
|
||||
node = isl_schedule_node_child(node, 0);
|
||||
node = isl_schedule_node_child(node, 0);
|
||||
|
@ -5675,7 +5680,8 @@ static __isl_give isl_printer *generate(__isl_take isl_printer *p,
|
|||
p = print_cpu(p, scop, options);
|
||||
isl_schedule_free(schedule);
|
||||
} else {
|
||||
schedule = map_to_device(gen, schedule);
|
||||
const int create_to_from_device = 1;
|
||||
schedule = map_to_device(gen, schedule, create_to_from_device);
|
||||
gen->tree = generate_code(gen, schedule);
|
||||
p = ppcg_set_macro_names(p);
|
||||
p = ppcg_print_exposed_declarations(p, prog->scop);
|
||||
|
|
|
@ -447,7 +447,8 @@ __isl_give isl_schedule_node *gpu_create_kernel(struct gpu_gen *gen,
|
|||
__isl_give isl_schedule *get_schedule(struct gpu_gen *gen);
|
||||
int has_any_permutable_node(__isl_keep isl_schedule *schedule);
|
||||
__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
|
||||
__isl_take isl_schedule *schedule);
|
||||
__isl_take isl_schedule *schedule,
|
||||
int to_from_device);
|
||||
__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
|
||||
__isl_take isl_schedule *schedule);
|
||||
|
||||
|
|
Loading…
Reference in New Issue