CodeGen: Allow Polly to do 'grouped unrolling', but no vector generation.

Grouped unrolling means that we unroll a loop such that the different instances
of a certain statement are scheduled right after each other, but we do
not generate any vector code. The idea here is that we can schedule the
bb vectorizer right afterwards and use it heuristics to decide when
vectorization should be performed.

llvm-svn: 154251
This commit is contained in:
Tobias Grosser 2012-04-07 06:16:08 +00:00
parent b9e88d4186
commit 84ecc47e1c
2 changed files with 33 additions and 0 deletions

View File

@ -84,6 +84,12 @@ Aligned("enable-polly-aligned",
cl::value_desc("OpenMP code generation enabled if true"),
cl::init(false), cl::ZeroOrMore);
static cl::opt<bool>
GroupedUnrolling("enable-polly-grouped-unroll",
cl::desc("Perform grouped unrolling, but don't generate SIMD "
"instuctions"), cl::Hidden, cl::init(false),
cl::ZeroOrMore);
typedef DenseMap<const Value*, Value*> ValueMapT;
typedef DenseMap<const char*, Value*> CharMapT;
typedef std::vector<ValueMapT> VectorValueMapT;
@ -676,6 +682,14 @@ void VectorBlockGenerator::generateLoad(const LoadInst *Load,
VectorValueMapT &ScalarMaps) {
Value *NewLoad;
if (GroupedUnrolling) {
for (int i = 0; i < getVectorWidth(); i++)
ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
GlobalMaps[i]);
return;
}
MemoryAccess &Access = Statement.getAccessFor(Load);
if (Access.isStrideZero(isl_set_copy(Domain)))

View File

@ -1,4 +1,5 @@
; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -enable-polly-grouped-unroll -S %s | FileCheck -check-prefix=UNROLL %s
; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -analyze %s | FileCheck -check-prefix=IMPORT %s
; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen %s -S -enable-polly-vector | FileCheck -check-prefix=CODEGEN %s
; ModuleID = 'simple_vec_stride_one.s'
@ -50,3 +51,21 @@ define i32 @main() nounwind {
; We do not generate optimal loads for this.
; CODEGEN: <4 x float>
; UNROLL: %p_scevgep1.moved.to. = getelementptr [1024 x float]* @A, i64 0, i64 0
; UNROLL: %p_scevgep1.moved.to.1 = getelementptr [1024 x float]* @A, i64 0, i64 1
; UNROLL: %p_scevgep1.moved.to.2 = getelementptr [1024 x float]* @A, i64 0, i64 2
; UNROLL: %p_scevgep1.moved.to.3 = getelementptr [1024 x float]* @A, i64 0, i64 3
; UNROLL: %p_scevgep.moved.to. = getelementptr [1024 x float]* @B, i64 0, i64 0
; UNROLL: %p_scevgep.moved.to.4 = getelementptr [1024 x float]* @B, i64 0, i64 1
; UNROLL: %p_scevgep.moved.to.5 = getelementptr [1024 x float]* @B, i64 0, i64 2
; UNROLL: %p_scevgep.moved.to.6 = getelementptr [1024 x float]* @B, i64 0, i64 3
; UNROLL: %_p_scalar_ = load float* %p_scevgep1.moved.to.
; UNROLL: %_p_scalar_7 = load float* %p_scevgep1.moved.to.1
; UNROLL: %_p_scalar_8 = load float* %p_scevgep1.moved.to.2
; UNROLL: %_p_scalar_9 = load float* %p_scevgep1.moved.to.3
; UNROLL: store float %_p_scalar_, float* %p_scevgep.moved.to., align 4
; UNROLL: store float %_p_scalar_7, float* %p_scevgep.moved.to.4, align 4
; UNROLL: store float %_p_scalar_8, float* %p_scevgep.moved.to.5, align 4
; UNROLL: store float %_p_scalar_9, float* %p_scevgep.moved.to.6, align 4
; UNROLL: br label %polly.merge_new_and_old