diff --git a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index fa5297071e91..19ace642ef10 100644 --- a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -443,6 +443,8 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::nearbyint: + ISD = ISD::FNEARBYINT; break; case Intrinsic::rint: ISD = ISD::FRINT; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll index e235a36222a7..8eeee8124d9a 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll @@ -30,3 +30,31 @@ for.end: ; preds = %vector.body } declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone + +define void @test2(float* nocapture %f) nounwind { +vector.ph: + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds float* %f, i64 %index + %1 = bitcast float* %0 to <4 x float>* + %wide.load = load <4 x float>* %1, align 4 + %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) + store <4 x float> %2, <4 x float>* %1, align 4 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: ; preds = %vector.body + ret void + +; CORE2: Printing analysis 'Cost Model Analysis' for function 'test2': +; CORE2: Cost Model: Found an estimated cost of 400 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) + +; COREI7: Printing analysis 'Cost Model Analysis' for function 'test2': +; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load) + +} + +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone