From ea8d803bb088a35f84cb29e2b827776178214356 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 8 Sep 2011 18:05:02 +0000 Subject: [PATCH] Fix PR10844: Add patterns to cover non foldable versions of X86vzmovl. Triggered using llc -O0. Also fix some SET0PS patterns to their AVX forms and test it on the testcase. llvm-svn: 139304 --- llvm/lib/Target/X86/X86InstrSSE.td | 18 ++++++++++++++---- llvm/test/CodeGen/X86/avx-load-store.ll | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c210a987dc54..9148c76ce0c3 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -536,15 +536,15 @@ let Predicates = [HasAVX] in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVS{S,D} to the lower bits. def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (VMOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>; + (VMOVSSrr (v4f32 (AVX_SET0PS)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (VMOVSSrr (v4f32 (V_SET0PS)), + (VMOVSSrr (v4f32 (AVX_SET0PS)), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (VMOVSSrr (v4i32 (V_SET0PI)), + (VMOVSSrr (v4i32 (AVX_SET0PI)), (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (VMOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>; + (VMOVSDrr (v2f64 (AVX_SET0PS)), FR64:$src)>; } let AddedComplexity = 20 in { @@ -579,6 +579,16 @@ let Predicates = [HasAVX] in { (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>; } + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), + (SUBREG_TO_REG (i32 0), + (v4f32 (VMOVSSrr (v4f32 (AVX_SET0PS)), FR32:$src)), + sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))), + (SUBREG_TO_REG (i64 0), + (v2f64 (VMOVSDrr (v2f64 (AVX_SET0PS)), FR64:$src)), + sub_xmm)>; // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll index f70291b96e16..1fda9bc22923 100644 --- a/llvm/test/CodeGen/X86/avx-load-store.ll +++ b/llvm/test/CodeGen/X86/avx-load-store.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s +; RUN: llc -O0 < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s -check-prefix=CHECK_O0 ; CHECK: vmovaps ; CHECK: vmovaps @@ -78,3 +79,27 @@ entry: ret void } +declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind + +; CHECK_O0: _f_f +; CHECK-O0: vmovss LCPI +; CHECK-O0: vxorps %xmm +; CHECK-O0: vmovss %xmm +define void @f_f() nounwind { +allocas: + br i1 undef, label %cif_mask_all, label %cif_mask_mixed + +cif_mask_all: ; preds = %allocas + unreachable + +cif_mask_mixed: ; preds = %allocas + br i1 undef, label %cif_mixed_test_all, label %cif_mixed_test_any_check + +cif_mixed_test_all: ; preds = %cif_mask_mixed + call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x float> , <8 x float> undef) nounwind + unreachable + +cif_mixed_test_any_check: ; preds = %cif_mask_mixed + unreachable +} +