CodeGen: Allow Polly to do 'grouped unrolling', but no vector generation.
Grouped unrolling means that we unroll a loop such that the different instances of a certain statement are scheduled right after each other, but we do not generate any vector code. The idea here is that we can schedule the bb vectorizer right afterwards and use it heuristics to decide when vectorization should be performed. llvm-svn: 154251
This commit is contained in:
parent
b9e88d4186
commit
84ecc47e1c
|
@ -84,6 +84,12 @@ Aligned("enable-polly-aligned",
|
|||
cl::value_desc("OpenMP code generation enabled if true"),
|
||||
cl::init(false), cl::ZeroOrMore);
|
||||
|
||||
static cl::opt<bool>
|
||||
GroupedUnrolling("enable-polly-grouped-unroll",
|
||||
cl::desc("Perform grouped unrolling, but don't generate SIMD "
|
||||
"instuctions"), cl::Hidden, cl::init(false),
|
||||
cl::ZeroOrMore);
|
||||
|
||||
typedef DenseMap<const Value*, Value*> ValueMapT;
|
||||
typedef DenseMap<const char*, Value*> CharMapT;
|
||||
typedef std::vector<ValueMapT> VectorValueMapT;
|
||||
|
@ -676,6 +682,14 @@ void VectorBlockGenerator::generateLoad(const LoadInst *Load,
|
|||
VectorValueMapT &ScalarMaps) {
|
||||
Value *NewLoad;
|
||||
|
||||
if (GroupedUnrolling) {
|
||||
for (int i = 0; i < getVectorWidth(); i++)
|
||||
ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
|
||||
GlobalMaps[i]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
MemoryAccess &Access = Statement.getAccessFor(Load);
|
||||
|
||||
if (Access.isStrideZero(isl_set_copy(Domain)))
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
|
||||
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -enable-polly-grouped-unroll -S %s | FileCheck -check-prefix=UNROLL %s
|
||||
; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -analyze %s | FileCheck -check-prefix=IMPORT %s
|
||||
; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen %s -S -enable-polly-vector | FileCheck -check-prefix=CODEGEN %s
|
||||
; ModuleID = 'simple_vec_stride_one.s'
|
||||
|
@ -50,3 +51,21 @@ define i32 @main() nounwind {
|
|||
; We do not generate optimal loads for this.
|
||||
; CODEGEN: <4 x float>
|
||||
|
||||
|
||||
; UNROLL: %p_scevgep1.moved.to. = getelementptr [1024 x float]* @A, i64 0, i64 0
|
||||
; UNROLL: %p_scevgep1.moved.to.1 = getelementptr [1024 x float]* @A, i64 0, i64 1
|
||||
; UNROLL: %p_scevgep1.moved.to.2 = getelementptr [1024 x float]* @A, i64 0, i64 2
|
||||
; UNROLL: %p_scevgep1.moved.to.3 = getelementptr [1024 x float]* @A, i64 0, i64 3
|
||||
; UNROLL: %p_scevgep.moved.to. = getelementptr [1024 x float]* @B, i64 0, i64 0
|
||||
; UNROLL: %p_scevgep.moved.to.4 = getelementptr [1024 x float]* @B, i64 0, i64 1
|
||||
; UNROLL: %p_scevgep.moved.to.5 = getelementptr [1024 x float]* @B, i64 0, i64 2
|
||||
; UNROLL: %p_scevgep.moved.to.6 = getelementptr [1024 x float]* @B, i64 0, i64 3
|
||||
; UNROLL: %_p_scalar_ = load float* %p_scevgep1.moved.to.
|
||||
; UNROLL: %_p_scalar_7 = load float* %p_scevgep1.moved.to.1
|
||||
; UNROLL: %_p_scalar_8 = load float* %p_scevgep1.moved.to.2
|
||||
; UNROLL: %_p_scalar_9 = load float* %p_scevgep1.moved.to.3
|
||||
; UNROLL: store float %_p_scalar_, float* %p_scevgep.moved.to., align 4
|
||||
; UNROLL: store float %_p_scalar_7, float* %p_scevgep.moved.to.4, align 4
|
||||
; UNROLL: store float %_p_scalar_8, float* %p_scevgep.moved.to.5, align 4
|
||||
; UNROLL: store float %_p_scalar_9, float* %p_scevgep.moved.to.6, align 4
|
||||
; UNROLL: br label %polly.merge_new_and_old
|
||||
|
|
Loading…
Reference in New Issue