From 19446a07a71f44bb073634772bb36ff1ffa1ed67 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 5 Dec 2011 22:37:00 +0000 Subject: [PATCH] Make the MemCpyOptimizer a bit more aggressive. I can't think of a scenerio where this would be bad as the backend shouldn't have a problem inlining small memcpys. rdar://10510150 llvm-svn: 145865 --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 2 +- llvm/test/Transforms/MemCpyOpt/form-memset.ll | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9e4f51f45493..6c6db60cc3eb 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -148,7 +148,7 @@ struct MemsetRange { bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { // If we found more than 8 stores to merge or 64 bytes, use memset. - if (TheStores.size() >= 8 || End-Start >= 64) return true; + if (TheStores.size() >= 4 || End-Start >= 16) return true; // If there is nothing to merge, don't do anything. if (TheStores.size() < 2) return false; diff --git a/llvm/test/Transforms/MemCpyOpt/form-memset.ll b/llvm/test/Transforms/MemCpyOpt/form-memset.ll index 1ac97e9e6b91..e5ace3327029 100644 --- a/llvm/test/Transforms/MemCpyOpt/form-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/form-memset.ll @@ -57,8 +57,8 @@ entry: declare i32 @bar(...) +%struct.MV = type { i16, i16 } - %struct.MV = type { i16, i16 } define void @test2() nounwind { entry: @@ -220,3 +220,19 @@ entry: ; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false) } +; More aggressive heuristic +; rdar://9892684 +define void @test7(i32* nocapture %c) nounwind optsize { + store i32 -1, i32* %c, align 4 + %1 = getelementptr inbounds i32* %c, i32 1 + store i32 -1, i32* %1, align 4 + %2 = getelementptr inbounds i32* %c, i32 2 + store i32 -1, i32* %2, align 4 + %3 = getelementptr inbounds i32* %c, i32 3 + store i32 -1, i32* %3, align 4 + %4 = getelementptr inbounds i32* %c, i32 4 + store i32 -1, i32* %4, align 4 +; CHECK: @test7 +; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false) + ret void +}