The PMOVZXWD family of functions had patterns extends narrow vector types to wide vector types.

It had patterns for zext-loading and extending. This commit adds patterns for loading a wide type, performing a bitcast,
and extending. This is an odd pattern, but it is commonly used when writing code with intrinsics.

rdar://11897677

llvm-svn: 163995
This commit is contained in:
Nadav Rotem 2012-09-16 07:39:07 +00:00
parent db90859400
commit 37521aa89c
2 changed files with 46 additions and 0 deletions

View File

@ -5576,31 +5576,43 @@ let Predicates = [HasAVX] in {
(VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
(VPMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
(VPMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
(VPMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
(VPMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
(VPMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(VPMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(VPMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
(VPMOVZXDQrm addr:$src)>;
}
let Predicates = [UseSSE41] in {
@ -5609,31 +5621,43 @@ let Predicates = [UseSSE41] in {
(PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
(PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
(PMOVSXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
(PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
(PMOVSXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
(PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
(PMOVSXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
(PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
(PMOVZXBWrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
(PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
(PMOVZXWDrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
(PMOVZXDQrm addr:$src)>;
def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
(PMOVZXDQrm addr:$src)>;
}
let Predicates = [HasAVX2] in {

View File

@ -0,0 +1,22 @@
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
; rdar://11897677
;CHECK: intrin_pmov
;CHECK: pmovzxbw (%rsi), %xmm0
;CHECK-NEXT: movdqu
;CHECK-NEXT: ret
define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp {
%1 = bitcast i8* %src to <2 x i64>*
%2 = load <2 x i64>* %1, align 16
%3 = bitcast <2 x i64> %2 to <16 x i8>
%4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind
%5 = bitcast i16* %dest to i8*
%6 = bitcast <8 x i16> %4 to <16 x i8>
tail call void @llvm.x86.sse2.storeu.dq(i8* %5, <16 x i8> %6) nounwind
ret void
}
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind