Do really not unroll the vector loop in combination with register tiling
The previous commit lacked a test case for register tiling + pre-vectorization and we obviously got it immediately wrong. llvm-svn: 245599
This commit is contained in:
parent
d83b8a83ec
commit
fc490a99f5
|
@ -306,12 +306,11 @@ IslScheduleOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
|
|||
isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth));
|
||||
Node = isl_schedule_node_band_tile(Node, Sizes);
|
||||
Node = isl_schedule_node_child(Node, 0);
|
||||
Node = isl_schedule_node_band_sink(Node);
|
||||
|
||||
// Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
|
||||
// we will have troubles to match it in the backend.
|
||||
Node = isl_schedule_node_band_set_ast_build_options(
|
||||
Node, isl_union_set_read_from_str(Ctx, "{unroll[x]: 1 = 0}"));
|
||||
Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));
|
||||
Node = isl_schedule_node_band_sink(Node);
|
||||
Node = isl_schedule_node_child(Node, 0);
|
||||
return Node;
|
||||
}
|
||||
|
|
|
@ -14,6 +14,14 @@
|
|||
; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER
|
||||
|
||||
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-opt-isl -analyze \
|
||||
; RUN: -polly-2nd-level-tiling -polly-ast \
|
||||
; RUN: -polly-tile-sizes=256,16 -polly-no-early-exit \
|
||||
; RUN: -polly-register-tiling -polly-register-tile-sizes=2,4 \
|
||||
; RUN: -polly-vectorizer=polly \
|
||||
; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER-PLUS-VECTORIZATION
|
||||
|
||||
; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1)
|
||||
; CHECK: for (int c1 = 0; c1 <= 31; c1 += 1)
|
||||
; CHECK: for (int c2 = 0; c2 <= 255; c2 += 1)
|
||||
|
@ -46,7 +54,20 @@
|
|||
; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1);
|
||||
; TWO-PLUS-REGISTER: }
|
||||
|
||||
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1)
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c1 = 0; c1 <= 31; c1 += 1)
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c2 = 0; c2 <= 15; c2 += 1)
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c3 = 0; c3 <= 1; c3 += 1)
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c4 = 0; c4 <= 7; c4 += 1)
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c5 = 0; c5 <= 1; c5 += 1) {
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8);
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8);
|
||||
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: }
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
|
||||
|
|
Loading…
Reference in New Issue