diff --git a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index cb12a40ff0fa..b48b81767ee4 100644 --- a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -452,6 +452,9 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return 0; } const TargetLoweringBase *TLI = getTLI(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b93d9a065ace..a62fedc43d16 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1775,6 +1775,8 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { case Intrinsic::pow: case Intrinsic::fma: case Intrinsic::fmuladd: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: return II->getIntrinsicID(); default: return Intrinsic::not_intrinsic; @@ -2491,15 +2493,23 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, CallInst *CI = cast(it); Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); assert(ID && "Not an intrinsic call!"); - for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector Args; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { - VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); - Args.push_back(Arg[Part]); + switch (ID) { + case Intrinsic::lifetime_end: + case Intrinsic::lifetime_start: + scalarizeInstruction(it); + break; + default: + for (unsigned Part = 0; Part < UF; ++Part) { + SmallVector Args; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); + Args.push_back(Arg[Part]); + } + Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) }; + Function *F = Intrinsic::getDeclaration(M, ID, Tys); + Entry[Part] = Builder.CreateCall(F, Args); } - Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) }; - Function *F = Intrinsic::getDeclaration(M, ID, Tys); - Entry[Part] = Builder.CreateCall(F, Args); + break; } break; } diff --git a/llvm/test/Transforms/LoopVectorize/lifetime.ll b/llvm/test/Transforms/LoopVectorize/lifetime.ll new file mode 100644 index 000000000000..87006ed0651c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/lifetime.ll @@ -0,0 +1,96 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Make sure we can vectorize loops which contain lifetime markers. + +; CHECK-LABEL: test +; CHECK: call void @llvm.lifetime.end +; CHECK: store <2 x i32> +; CHECK: call void @llvm.lifetime.start + +define void @test(i32 *%d) { +entry: + %arr = alloca [1024 x i32], align 16 + %0 = bitcast [1024 x i32]* %arr to i8* + call void @llvm.lifetime.start(i64 4096, i8* %0) #1 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + call void @llvm.lifetime.end(i64 4096, i8* %0) #1 + %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv + %1 = load i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + call void @llvm.lifetime.start(i64 4096, i8* %0) #1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + call void @llvm.lifetime.end(i64 4096, i8* %0) #1 + ret void +} + +; CHECK-LABEL: testbitcast +; CHECK: call void @llvm.lifetime.end +; CHECK: store <2 x i32> +; CHECK: call void @llvm.lifetime.start + +define void @testbitcast(i32 *%d) { +entry: + %arr = alloca [1024 x i32], align 16 + %0 = bitcast [1024 x i32]* %arr to i8* + call void @llvm.lifetime.start(i64 4096, i8* %0) #1 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %1 = bitcast [1024 x i32]* %arr to i8* + call void @llvm.lifetime.end(i64 4096, i8* %1) #1 + %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv + %2 = load i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + call void @llvm.lifetime.start(i64 4096, i8* %1) #1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + call void @llvm.lifetime.end(i64 4096, i8* %0) #1 + ret void +} + +; CHECK-LABEL: testloopvariant +; CHECK: call void @llvm.lifetime.end +; CHECK: store <2 x i32> +; CHECK: call void @llvm.lifetime.start + +define void @testloopvariant(i32 *%d) { +entry: + %arr = alloca [1024 x i32], align 16 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = getelementptr [1024 x i32]* %arr, i32 0, i64 %indvars.iv + %1 = bitcast [1024 x i32]* %arr to i8* + call void @llvm.lifetime.end(i64 4096, i8* %1) #1 + %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv + %2 = load i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + call void @llvm.lifetime.start(i64 4096, i8* %1) #1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +declare void @llvm.lifetime.end(i64, i8* nocapture) #1