X86: Prefer using VPSHUFD over VPERMIL because it has better throughput.

llvm-svn: 169624
This commit is contained in:
Nadav Rotem 2012-12-07 19:01:13 +00:00
parent 889037d754
commit 481e50efe0
4 changed files with 9 additions and 8 deletions

View File

@ -6781,12 +6781,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
unsigned TargetMask = getShuffleSHUFImmediate(SVOp); unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32)) if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG); return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
DAG);
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1, return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
TargetMask, DAG); TargetMask, DAG);
} }

View File

@ -2348,7 +2348,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
; CHECK: vpermilps ; CHECK: vpshufd
%res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res ret <4 x float> %res
} }

View File

@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
ret <4 x float> %b ret <4 x float> %b
; CHECK: test1: ; CHECK: test1:
; CHECK: vshufps ; CHECK: vshufps
; CHECK: vpermilps ; CHECK: vpshufd
} }
; rdar://10538417 ; rdar://10538417
@ -106,7 +106,7 @@ define <4 x float> @test11(<4 x float> %a) nounwind {
define <4 x float> @test12(<4 x float>* %a) nounwind { define <4 x float> @test12(<4 x float>* %a) nounwind {
; CHECK: test12 ; CHECK: test12
; CHECK: vpermilps $27, ( ; CHECK: vpshufd
%tmp0 = load <4 x float>* %a %tmp0 = load <4 x float>* %a
%tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %tmp1 ret <4 x float> %tmp1

View File

@ -84,7 +84,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
ret <8 x float> %tmp ret <8 x float> %tmp
} }
; CHECK: vpermilps $0 ; CHECK: vpshufd $0
; CHECK-NEXT: vinsertf128 $1 ; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
entry: entry:
@ -93,7 +93,7 @@ entry:
} }
; CHECK: vextractf128 $1 ; CHECK: vextractf128 $1
; CHECK-NEXT: vpermilps $85 ; CHECK-NEXT: vpshufd
; CHECK-NEXT: vinsertf128 $1 ; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
entry: entry: