Do really not unroll the vector loop in combination with register tiling

The previous commit lacked a test case for register tiling + pre-vectorization
and we obviously got it immediately wrong.

llvm-svn: 245599
This commit is contained in:
Tobias Grosser 2015-08-20 19:08:16 +00:00
parent d83b8a83ec
commit fc490a99f5
2 changed files with 24 additions and 4 deletions

View File

@ -306,12 +306,11 @@ IslScheduleOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth));
Node = isl_schedule_node_band_tile(Node, Sizes);
Node = isl_schedule_node_child(Node, 0);
Node = isl_schedule_node_band_sink(Node);
// Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
// we will have troubles to match it in the backend.
Node = isl_schedule_node_band_set_ast_build_options(
Node, isl_union_set_read_from_str(Ctx, "{unroll[x]: 1 = 0}"));
Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));
Node = isl_schedule_node_band_sink(Node);
Node = isl_schedule_node_child(Node, 0);
return Node;
}

View File

@ -14,6 +14,14 @@
; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \
; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-opt-isl -analyze \
; RUN: -polly-2nd-level-tiling -polly-ast \
; RUN: -polly-tile-sizes=256,16 -polly-no-early-exit \
; RUN: -polly-register-tiling -polly-register-tile-sizes=2,4 \
; RUN: -polly-vectorizer=polly \
; RUN: -polly-2nd-level-tile-sizes=16,8 < %s | \
; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER-PLUS-VECTORIZATION
; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 31; c1 += 1)
; CHECK: for (int c2 = 0; c2 <= 255; c2 += 1)
@ -46,7 +54,20 @@
; TWO-PLUS-REGISTER: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1);
; TWO-PLUS-REGISTER: }
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c1 = 0; c1 <= 31; c1 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c2 = 0; c2 <= 15; c2 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c3 = 0; c3 <= 1; c3 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c4 = 0; c4 <= 7; c4 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c5 = 0; c5 <= 1; c5 += 1) {
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8);
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8);
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"