[X86] Enable sse2_cvtsd2ss intrinsic to use an EVEX encoded instruction.

llvm-svn: 355810
This commit is contained in:
Craig Topper 2019-03-11 06:01:04 +00:00
parent f1e7482e69
commit 00afa193f1
4 changed files with 87 additions and 43 deletions

View File

@ -1215,28 +1215,28 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
(v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
XD, VEX_4V, VEX_WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS]>;
def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
[(set VR128:$dst,
(v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>,
XD, VEX_4V, VEX_WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
(v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))]>,
[(set VR128:$dst,
(v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>,
XD, Requires<[UseSSE2]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}

View File

@ -1017,6 +1017,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvtsd2si, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
X86_INTRINSIC_DATA(sse2_cvtsd2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
X86_INTRINSIC_DATA(sse2_cvtsd2ss, INTR_TYPE_2OP, X86ISD::VFPROUNDS, 0),
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),

View File

@ -1553,10 +1553,15 @@ define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX-LABEL: test_mm_cvtsd_ss:
; AVX: # %bb.0:
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
; AVX1-LABEL: test_mm_cvtsd_ss:
; AVX1: # %bb.0:
; AVX1-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1]
; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512-LABEL: test_mm_cvtsd_ss:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
ret <4 x float> %res
}
@ -1569,21 +1574,32 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX-LABEL: test_mm_cvtsd_ss_load:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX-NEXT: retl # encoding: [0xc3]
; X86-AVX1-LABEL: test_mm_cvtsd_ss_load:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX1-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512-LABEL: test_mm_cvtsd_ss_load:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_cvtsd_ss_load:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_mm_cvtsd_ss_load:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX-NEXT: retq # encoding: [0xc3]
; X64-AVX1-LABEL: test_mm_cvtsd_ss_load:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX1-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512-LABEL: test_mm_cvtsd_ss_load:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX512-NEXT: retq # encoding: [0xc3]
%a1 = load <2 x double>, <2 x double>* %p1
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
ret <4 x float> %res

View File

@ -423,10 +423,15 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
;
; AVX-LABEL: test_x86_sse2_cvtsd2ss:
; AVX: ## %bb.0:
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1]
; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
; AVX1-LABEL: test_x86_sse2_cvtsd2ss:
; AVX1: ## %bb.0:
; AVX1-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1]
; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
;
; AVX512-LABEL: test_x86_sse2_cvtsd2ss:
; AVX512: ## %bb.0:
; AVX512-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1]
; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@ -440,21 +445,32 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %
; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
; X86-SSE-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX-LABEL: test_x86_sse2_cvtsd2ss_load:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX-NEXT: retl ## encoding: [0xc3]
; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
; X64-SSE: ## %bb.0:
; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
; X64-SSE-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX-LABEL: test_x86_sse2_cvtsd2ss_load:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX-NEXT: retq ## encoding: [0xc3]
; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
%a1 = load <2 x double>, <2 x double>* %p1
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@ -468,21 +484,32 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, <2 x do
; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
; X86-SSE-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX-NEXT: retl ## encoding: [0xc3]
; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
; X64-SSE: ## %bb.0:
; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07]
; X64-SSE-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX-NEXT: retq ## encoding: [0xc3]
; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
%a1 = load <2 x double>, <2 x double>* %p1
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res