LoopVectorize: Allow vectorization of loops with lifetime markers

Patch by Marc Jessome!

llvm-svn: 187825
This commit is contained in:
Arnold Schwaighofer 2013-08-06 22:37:52 +00:00
parent e2a39e7532
commit a7cd6bf3bb
3 changed files with 117 additions and 8 deletions

View File

@ -452,6 +452,9 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::pow: ISD = ISD::FPOW; break;
case Intrinsic::fma: ISD = ISD::FMA; break;
case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return 0;
}
const TargetLoweringBase *TLI = getTLI();

View File

@ -1775,6 +1775,8 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return II->getIntrinsicID();
default:
return Intrinsic::not_intrinsic;
@ -2491,6 +2493,12 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
CallInst *CI = cast<CallInst>(it);
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
assert(ID && "Not an intrinsic call!");
switch (ID) {
case Intrinsic::lifetime_end:
case Intrinsic::lifetime_start:
scalarizeInstruction(it);
break;
default:
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
@ -2503,6 +2511,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
}
break;
}
break;
}
default:
// All other instructions are unsupported. Scalarize them.

View File

@ -0,0 +1,96 @@
; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Make sure we can vectorize loops which contain lifetime markers.
; CHECK-LABEL: test
; CHECK: call void @llvm.lifetime.end
; CHECK: store <2 x i32>
; CHECK: call void @llvm.lifetime.start
define void @test(i32 *%d) {
entry:
%arr = alloca [1024 x i32], align 16
%0 = bitcast [1024 x i32]* %arr to i8*
call void @llvm.lifetime.start(i64 4096, i8* %0) #1
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
call void @llvm.lifetime.end(i64 4096, i8* %0) #1
%arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
%1 = load i32* %arrayidx, align 8
store i32 100, i32* %arrayidx, align 8
call void @llvm.lifetime.start(i64 4096, i8* %0) #1
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, 128
br i1 %exitcond, label %for.body, label %for.end
for.end:
call void @llvm.lifetime.end(i64 4096, i8* %0) #1
ret void
}
; CHECK-LABEL: testbitcast
; CHECK: call void @llvm.lifetime.end
; CHECK: store <2 x i32>
; CHECK: call void @llvm.lifetime.start
define void @testbitcast(i32 *%d) {
entry:
%arr = alloca [1024 x i32], align 16
%0 = bitcast [1024 x i32]* %arr to i8*
call void @llvm.lifetime.start(i64 4096, i8* %0) #1
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%1 = bitcast [1024 x i32]* %arr to i8*
call void @llvm.lifetime.end(i64 4096, i8* %1) #1
%arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
%2 = load i32* %arrayidx, align 8
store i32 100, i32* %arrayidx, align 8
call void @llvm.lifetime.start(i64 4096, i8* %1) #1
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, 128
br i1 %exitcond, label %for.body, label %for.end
for.end:
call void @llvm.lifetime.end(i64 4096, i8* %0) #1
ret void
}
; CHECK-LABEL: testloopvariant
; CHECK: call void @llvm.lifetime.end
; CHECK: store <2 x i32>
; CHECK: call void @llvm.lifetime.start
define void @testloopvariant(i32 *%d) {
entry:
%arr = alloca [1024 x i32], align 16
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = getelementptr [1024 x i32]* %arr, i32 0, i64 %indvars.iv
%1 = bitcast [1024 x i32]* %arr to i8*
call void @llvm.lifetime.end(i64 4096, i8* %1) #1
%arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
%2 = load i32* %arrayidx, align 8
store i32 100, i32* %arrayidx, align 8
call void @llvm.lifetime.start(i64 4096, i8* %1) #1
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, 128
br i1 %exitcond, label %for.body, label %for.end
for.end:
ret void
}
declare void @llvm.lifetime.start(i64, i8* nocapture) #1
declare void @llvm.lifetime.end(i64, i8* nocapture) #1