From 9cdfd44521e035ccc571b37d6b48fe391b00961e Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 16 Feb 2009 00:44:23 +0000 Subject: [PATCH] Change these tests to use regular loads instead of llvm.x86.sse2.loadu.dq. Enhance instcombine to use the preferred field of GetOrEnforceKnownAlignment in more cases, so that regular IR operations are optimized in the same way that the intrinsics currently are. llvm-svn: 64623 --- .../Scalar/InstructionCombining.cpp | 8 ++++--- .../test/CodeGen/X86/2006-05-11-InstrSched.ll | 10 ++++----- .../CodeGen/X86/iv-users-in-other-loops.ll | 22 +++++++++---------- llvm/test/Transforms/InstCombine/align-inc.ll | 5 +---- 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/InstructionCombining.cpp b/llvm/lib/Transforms/Scalar/InstructionCombining.cpp index 7117bda70675..76b55663004a 100644 --- a/llvm/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/llvm/lib/Transforms/Scalar/InstructionCombining.cpp @@ -9275,7 +9275,7 @@ unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); + unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2), DstAlign); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment()->getZExtValue(); @@ -11097,7 +11097,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); // Attempt to improve the alignment. - unsigned KnownAlign = GetOrEnforceKnownAlignment(Op); + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); if (KnownAlign > (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : LI.getAlignment())) @@ -11376,7 +11377,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } // Attempt to improve the alignment. - unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr); + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); if (KnownAlign > (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : SI.getAlignment())) diff --git a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll index 774e7243fd37..6c0e76b34ade 100644 --- a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -14,8 +14,8 @@ cond_true: ; preds = %cond_true, %entry %k.0.0 = bitcast i32 %tmp.10 to i32 ; [#uses=2] %tmp31 = add i32 %k.0.0, -1 ; [#uses=4] %tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; [#uses=1] - %tmp34 = bitcast i32* %tmp32 to i8* ; [#uses=1] - %tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* %tmp34 ) ; <<16 x i8>> [#uses=1] + %tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; [#uses=1] + %tmp = load <16 x i8>* %tmp34, align 1 %tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; [#uses=1] %tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1] @@ -23,8 +23,8 @@ cond_true: ; preds = %cond_true, %entry %tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2] %tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1] %tmp62 = getelementptr i32* %ip, i32 %tmp31 ; [#uses=1] - %tmp65 = bitcast i32* %tmp62 to i8* ; [#uses=1] - %tmp66 = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* %tmp65 ) ; <<16 x i8>> [#uses=1] + %tmp65 = bitcast i32* %tmp62 to <16 x i8>* ; [#uses=1] + %tmp66 = load <16 x i8>* %tmp65, align 1 %tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; [#uses=1] %tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1] @@ -50,6 +50,4 @@ return: ; preds = %cond_true, %entry ret void } -declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) - declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) diff --git a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll index 175f4c0c6a3f..e8762bc96895 100644 --- a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -160,23 +160,23 @@ bb9: ; preds = %bb9, %bb10.preheader %B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec ; [#uses=2] %B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum ; [#uses=1] %A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum ; [#uses=1] - %61 = bitcast float* %B_addr.438 to i8* ; [#uses=1] - %62 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %61) nounwind readonly ; <<4 x float>> [#uses=1] + %61 = bitcast float* %B_addr.438 to <4 x float>* ; [#uses=1] + %62 = load <4 x float>* %61, align 1 %B_addr.438.sum169 = or i64 %A_addr.440.rec, 4 ; [#uses=1] %B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169 ; [#uses=2] %63 = getelementptr float* %B, i64 %B_addr.0.sum187 ; [#uses=1] - %64 = bitcast float* %63 to i8* ; [#uses=1] - %65 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %64) nounwind readonly ; <<4 x float>> [#uses=1] + %64 = bitcast float* %63 to <4 x float>* ; [#uses=1] + %65 = load <4 x float>* %64, align 1 %B_addr.438.sum168 = or i64 %A_addr.440.rec, 8 ; [#uses=1] %B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168 ; [#uses=2] %66 = getelementptr float* %B, i64 %B_addr.0.sum186 ; [#uses=1] - %67 = bitcast float* %66 to i8* ; [#uses=1] - %68 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %67) nounwind readonly ; <<4 x float>> [#uses=1] + %67 = bitcast float* %66 to <4 x float>* ; [#uses=1] + %68 = load <4 x float>* %67, align 1 %B_addr.438.sum167 = or i64 %A_addr.440.rec, 12 ; [#uses=1] %B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167 ; [#uses=2] %69 = getelementptr float* %B, i64 %B_addr.0.sum185 ; [#uses=1] - %70 = bitcast float* %69 to i8* ; [#uses=1] - %71 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %70) nounwind readonly ; <<4 x float>> [#uses=1] + %70 = bitcast float* %69 to <4 x float>* ; [#uses=1] + %71 = load <4 x float>* %70, align 1 %72 = bitcast float* %A_addr.440 to <4 x float>* ; <<4 x float>*> [#uses=1] %73 = load <4 x float>* %72, align 16 ; <<4 x float>> [#uses=1] %74 = mul <4 x float> %73, %62 ; <<4 x float>> [#uses=1] @@ -214,8 +214,8 @@ bb11: ; preds = %bb11, %bb12.loopexit %A_addr.529.rec = shl i64 %indvar, 2 ; [#uses=3] %B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec ; [#uses=1] %A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec ; [#uses=1] - %95 = bitcast float* %B_addr.527 to i8* ; [#uses=1] - %96 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %95) nounwind readonly ; <<4 x float>> [#uses=1] + %95 = bitcast float* %B_addr.527 to <4 x float>* ; [#uses=1] + %96 = load <4 x float>* %95, align 1 %97 = bitcast float* %A_addr.529 to <4 x float>* ; <<4 x float>*> [#uses=1] %98 = load <4 x float>* %97, align 16 ; <<4 x float>> [#uses=1] %99 = mul <4 x float> %98, %96 ; <<4 x float>> [#uses=1] @@ -288,5 +288,3 @@ bb16: ; preds = %bb14, %bb13 store float %Sum0.2.lcssa, float* %C, align 4 ret void } - -declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly diff --git a/llvm/test/Transforms/InstCombine/align-inc.ll b/llvm/test/Transforms/InstCombine/align-inc.ll index 0ad01cb23498..104d9918a9c7 100644 --- a/llvm/test/Transforms/InstCombine/align-inc.ll +++ b/llvm/test/Transforms/InstCombine/align-inc.ll @@ -3,12 +3,9 @@ @GLOBAL = internal global [4 x i32] zeroinitializer -declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) - - define <16 x i8> @foo(<2 x i64> %x) { entry: - %tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* bitcast ([4 x i32]* @GLOBAL to i8*) ) + %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1 ret <16 x i8> %tmp }