AVX512 : Implemented encoding and DAG lowering for VMOVHPS/PD and VMOVLPS/PD instructions.
Differential Revision: http://reviews.llvm.org/D14492 llvm-svn: 252592
This commit is contained in:
parent
649a607e11
commit
b6b27af46a
|
@ -4309,6 +4309,115 @@ let Predicates = [HasAVX512] in {
|
|||
(VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VMOVHPS/PD VMOVLPS Instructions
|
||||
// All patterns was taken from SSS implementation.
|
||||
//===----------------------------------------------------------------------===//
|
||||
multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
let mayLoad = 1 in
|
||||
def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set _.RC:$dst,
|
||||
(OpNode _.RC:$src1,
|
||||
(_.VT (bitconvert
|
||||
(v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
|
||||
IIC_SSE_MOV_LH>, EVEX_4V;
|
||||
}
|
||||
|
||||
defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
|
||||
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
|
||||
defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
|
||||
v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
|
||||
defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
|
||||
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
|
||||
defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
|
||||
v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
// VMOVHPS patterns
|
||||
def : Pat<(X86Movlhps VR128X:$src1,
|
||||
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
|
||||
(VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
|
||||
def : Pat<(X86Movlhps VR128X:$src1,
|
||||
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
|
||||
(VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
|
||||
// VMOVHPD patterns
|
||||
def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
|
||||
(scalar_to_vector (loadf64 addr:$src2)))),
|
||||
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
|
||||
def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
|
||||
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
|
||||
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
|
||||
// VMOVLPS patterns
|
||||
def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
|
||||
(VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
|
||||
(VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
|
||||
// VMOVLPD patterns
|
||||
def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
|
||||
(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
|
||||
(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
|
||||
def : Pat<(v2f64 (X86Movsd VR128X:$src1,
|
||||
(v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
||||
(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let mayStore = 1 in {
|
||||
def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
|
||||
(ins f64mem:$dst, VR128X:$src),
|
||||
"vmovhps\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract
|
||||
(X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
|
||||
(bc_v2f64 (v4f32 VR128X:$src))),
|
||||
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
|
||||
EVEX, EVEX_CD8<32, CD8VT2>;
|
||||
def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
|
||||
(ins f64mem:$dst, VR128X:$src),
|
||||
"vmovhpd\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract
|
||||
(v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
|
||||
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
|
||||
EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
|
||||
(ins f64mem:$dst, VR128X:$src),
|
||||
"vmovlps\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)),
|
||||
(iPTR 0))), addr:$dst)],
|
||||
IIC_SSE_MOV_LH>,
|
||||
EVEX, EVEX_CD8<32, CD8VT2>;
|
||||
def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
|
||||
(ins f64mem:$dst, VR128X:$src),
|
||||
"vmovlpd\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract (v2f64 VR128X:$src),
|
||||
(iPTR 0))), addr:$dst)],
|
||||
IIC_SSE_MOV_LH>,
|
||||
EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
// VMOVHPD patterns
|
||||
def : Pat<(store (f64 (vector_extract
|
||||
(v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
|
||||
(iPTR 0))), addr:$dst),
|
||||
(VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
|
||||
// VMOVLPS patterns
|
||||
def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
|
||||
addr:$src1),
|
||||
(VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
|
||||
def : Pat<(store (v4i32 (X86Movlps
|
||||
(bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
|
||||
(VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
|
||||
// VMOVLPD patterns
|
||||
def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
|
||||
addr:$src1),
|
||||
(VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
|
||||
def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
|
||||
addr:$src1),
|
||||
(VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FMA - Fused Multiply Operations
|
||||
//
|
||||
|
|
|
@ -1172,12 +1172,13 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
|
|||
|
||||
multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
|
||||
string base_opc, InstrItinClass itin> {
|
||||
defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
|
||||
let Predicates = [UseAVX] in
|
||||
defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
itin>, VEX_4V;
|
||||
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
|
||||
"\t{$src2, $dst|$dst, $src2}",
|
||||
itin>;
|
||||
}
|
||||
|
@ -1188,6 +1189,7 @@ let AddedComplexity = 20 in {
|
|||
}
|
||||
|
||||
let SchedRW = [WriteStore] in {
|
||||
let Predicates = [UseAVX] in {
|
||||
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
"movlps\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
|
||||
|
@ -1198,6 +1200,7 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
|||
[(store (f64 (vector_extract (v2f64 VR128:$src),
|
||||
(iPTR 0))), addr:$dst)],
|
||||
IIC_SSE_MOV_LH>, VEX;
|
||||
}// UseAVX
|
||||
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
"movlps\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
|
||||
|
@ -1210,7 +1213,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
|||
IIC_SSE_MOV_LH>;
|
||||
} // SchedRW
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let Predicates = [UseAVX] in {
|
||||
// Shuffle with VMOVLPS
|
||||
def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
|
||||
(VMOVLPSrm VR128:$src1, addr:$src2)>;
|
||||
|
@ -1297,6 +1300,7 @@ let AddedComplexity = 20 in {
|
|||
let SchedRW = [WriteStore] in {
|
||||
// v2f64 extract element 1 is always custom lowered to unpack high to low
|
||||
// and extract element 0 so the non-store version isn't too horrible.
|
||||
let Predicates = [UseAVX] in {
|
||||
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
"movhps\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract
|
||||
|
@ -1308,6 +1312,7 @@ def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
|||
[(store (f64 (vector_extract
|
||||
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
|
||||
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
|
||||
} // UseAVX
|
||||
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
"movhps\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract
|
||||
|
@ -1321,7 +1326,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
|||
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
|
||||
} // SchedRW
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let Predicates = [UseAVX] in {
|
||||
// VMOVHPS patterns
|
||||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
|
||||
|
|
|
@ -344,3 +344,41 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
|||
ret <16 x i16> %shuffle
|
||||
}
|
||||
|
||||
define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
|
||||
%a = load double, double* %ptr
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
||||
define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
|
||||
%a = load double, double* %ptr
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
||||
define void @store_floats(<4 x float> %x, i64* %p) {
|
||||
%a = fadd <4 x float> %x, %x
|
||||
%b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%c = bitcast <2 x float> %b to i64
|
||||
store i64 %c, i64* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_double(<2 x double> %x, i64* %p) {
|
||||
%a = fadd <2 x double> %x, %x
|
||||
%b = extractelement <2 x double> %a, i32 0
|
||||
%c = bitcast double %b to i64
|
||||
store i64 %c, i64* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_h_double(<2 x double> %x, i64* %p) {
|
||||
%a = fadd <2 x double> %x, %x
|
||||
%b = extractelement <2 x double> %a, i32 1
|
||||
%c = bitcast double %b to i64
|
||||
store i64 %c, i64* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -66,3 +66,21 @@ define void @store_int(<4 x i32> %x, <2 x float>* %p) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @store_h_double(<2 x double> %x, i64* %p) {
|
||||
; SSE-LABEL: store_h_double:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: addpd %xmm0, %xmm0
|
||||
; SSE-NEXT: movhpd %xmm0, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: store_h_double:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vaddpd %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovhpd %xmm0, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%a = fadd <2 x double> %x, %x
|
||||
%b = extractelement <2 x double> %a, i32 1
|
||||
%c = bitcast double %b to i64
|
||||
store i64 %c, i64* %p
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -18297,3 +18297,195 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
|
|||
// CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff]
|
||||
vmovd %xmm5, -516(%rdx)
|
||||
|
||||
// CHECK: vmovlps (%rcx), %xmm20, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x39]
|
||||
vmovlps (%rcx), %xmm20, %xmm7
|
||||
|
||||
// CHECK: vmovlps 291(%rax,%r14,8), %xmm20, %xmm7
|
||||
// CHECK: encoding: [0x62,0xb1,0x5c,0x00,0x12,0xbc,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovlps 291(%rax,%r14,8), %xmm20, %xmm7
|
||||
|
||||
// CHECK: vmovlps 1016(%rdx), %xmm20, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x7f]
|
||||
vmovlps 1016(%rdx), %xmm20, %xmm7
|
||||
|
||||
// CHECK: vmovlps 1024(%rdx), %xmm20, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0x00,0x04,0x00,0x00]
|
||||
vmovlps 1024(%rdx), %xmm20, %xmm7
|
||||
|
||||
// CHECK: vmovlps -1024(%rdx), %xmm20, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x80]
|
||||
vmovlps -1024(%rdx), %xmm20, %xmm7
|
||||
|
||||
// CHECK: vmovlps -1032(%rdx), %xmm20, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0xf8,0xfb,0xff,0xff]
|
||||
vmovlps -1032(%rdx), %xmm20, %xmm7
|
||||
|
||||
// CHECK: vmovlps %xmm27, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x19]
|
||||
vmovlps %xmm27, (%rcx)
|
||||
|
||||
// CHECK: vmovlps %xmm27, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovlps %xmm27, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovlps %xmm27, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x7f]
|
||||
vmovlps %xmm27, 1016(%rdx)
|
||||
|
||||
// CHECK: vmovlps %xmm27, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0x00,0x04,0x00,0x00]
|
||||
vmovlps %xmm27, 1024(%rdx)
|
||||
|
||||
// CHECK: vmovlps %xmm27, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x80]
|
||||
vmovlps %xmm27, -1024(%rdx)
|
||||
|
||||
// CHECK: vmovlps %xmm27, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff]
|
||||
vmovlps %xmm27, -1032(%rdx)
|
||||
|
||||
// CHECK: vmovlpd (%rcx), %xmm6, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x29]
|
||||
vmovlpd (%rcx), %xmm6, %xmm29
|
||||
|
||||
// CHECK: vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29
|
||||
// CHECK: encoding: [0x62,0x21,0xcd,0x08,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29
|
||||
|
||||
// CHECK: vmovlpd 1016(%rdx), %xmm6, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x7f]
|
||||
vmovlpd 1016(%rdx), %xmm6, %xmm29
|
||||
|
||||
// CHECK: vmovlpd 1024(%rdx), %xmm6, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0x00,0x04,0x00,0x00]
|
||||
vmovlpd 1024(%rdx), %xmm6, %xmm29
|
||||
|
||||
// CHECK: vmovlpd -1024(%rdx), %xmm6, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x80]
|
||||
vmovlpd -1024(%rdx), %xmm6, %xmm29
|
||||
|
||||
// CHECK: vmovlpd -1032(%rdx), %xmm6, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0xf8,0xfb,0xff,0xff]
|
||||
vmovlpd -1032(%rdx), %xmm6, %xmm29
|
||||
|
||||
// CHECK: vmovlpd %xmm25, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x09]
|
||||
vmovlpd %xmm25, (%rcx)
|
||||
|
||||
// CHECK: vmovlpd %xmm25, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x13,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovlpd %xmm25, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovlpd %xmm25, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x7f]
|
||||
vmovlpd %xmm25, 1016(%rdx)
|
||||
|
||||
// CHECK: vmovlpd %xmm25, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0x00,0x04,0x00,0x00]
|
||||
vmovlpd %xmm25, 1024(%rdx)
|
||||
|
||||
// CHECK: vmovlpd %xmm25, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x80]
|
||||
vmovlpd %xmm25, -1024(%rdx)
|
||||
|
||||
// CHECK: vmovlpd %xmm25, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0xf8,0xfb,0xff,0xff]
|
||||
vmovlpd %xmm25, -1032(%rdx)
|
||||
|
||||
// CHECK: vmovhps (%rcx), %xmm17, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x21]
|
||||
vmovhps (%rcx), %xmm17, %xmm20
|
||||
|
||||
// CHECK: vmovhps 291(%rax,%r14,8), %xmm17, %xmm20
|
||||
// CHECK: encoding: [0x62,0xa1,0x74,0x00,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovhps 291(%rax,%r14,8), %xmm17, %xmm20
|
||||
|
||||
// CHECK: vmovhps 1016(%rdx), %xmm17, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x7f]
|
||||
vmovhps 1016(%rdx), %xmm17, %xmm20
|
||||
|
||||
// CHECK: vmovhps 1024(%rdx), %xmm17, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0x00,0x04,0x00,0x00]
|
||||
vmovhps 1024(%rdx), %xmm17, %xmm20
|
||||
|
||||
// CHECK: vmovhps -1024(%rdx), %xmm17, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x80]
|
||||
vmovhps -1024(%rdx), %xmm17, %xmm20
|
||||
|
||||
// CHECK: vmovhps -1032(%rdx), %xmm17, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0xf8,0xfb,0xff,0xff]
|
||||
vmovhps -1032(%rdx), %xmm17, %xmm20
|
||||
|
||||
// CHECK: vmovhps %xmm18, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x11]
|
||||
vmovhps %xmm18, (%rcx)
|
||||
|
||||
// CHECK: vmovhps %xmm18, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x17,0x94,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovhps %xmm18, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovhps %xmm18, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x7f]
|
||||
vmovhps %xmm18, 1016(%rdx)
|
||||
|
||||
// CHECK: vmovhps %xmm18, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0x00,0x04,0x00,0x00]
|
||||
vmovhps %xmm18, 1024(%rdx)
|
||||
|
||||
// CHECK: vmovhps %xmm18, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x80]
|
||||
vmovhps %xmm18, -1024(%rdx)
|
||||
|
||||
// CHECK: vmovhps %xmm18, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0xf8,0xfb,0xff,0xff]
|
||||
vmovhps %xmm18, -1032(%rdx)
|
||||
|
||||
// CHECK: vmovhpd (%rcx), %xmm28, %xmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x19]
|
||||
vmovhpd (%rcx), %xmm28, %xmm19
|
||||
|
||||
// CHECK: vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19
|
||||
// CHECK: encoding: [0x62,0xa1,0x9d,0x00,0x16,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19
|
||||
|
||||
// CHECK: vmovhpd 1016(%rdx), %xmm28, %xmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x7f]
|
||||
vmovhpd 1016(%rdx), %xmm28, %xmm19
|
||||
|
||||
// CHECK: vmovhpd 1024(%rdx), %xmm28, %xmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0x00,0x04,0x00,0x00]
|
||||
vmovhpd 1024(%rdx), %xmm28, %xmm19
|
||||
|
||||
// CHECK: vmovhpd -1024(%rdx), %xmm28, %xmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x80]
|
||||
vmovhpd -1024(%rdx), %xmm28, %xmm19
|
||||
|
||||
// CHECK: vmovhpd -1032(%rdx), %xmm28, %xmm19
|
||||
// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0xf8,0xfb,0xff,0xff]
|
||||
vmovhpd -1032(%rdx), %xmm28, %xmm19
|
||||
|
||||
// CHECK: vmovhpd %xmm25, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x09]
|
||||
vmovhpd %xmm25, (%rcx)
|
||||
|
||||
// CHECK: vmovhpd %xmm25, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x17,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovhpd %xmm25, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovhpd %xmm25, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x7f]
|
||||
vmovhpd %xmm25, 1016(%rdx)
|
||||
|
||||
// CHECK: vmovhpd %xmm25, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0x00,0x04,0x00,0x00]
|
||||
vmovhpd %xmm25, 1024(%rdx)
|
||||
|
||||
// CHECK: vmovhpd %xmm25, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x80]
|
||||
vmovhpd %xmm25, -1024(%rdx)
|
||||
|
||||
// CHECK: vmovhpd %xmm25, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0xf8,0xfb,0xff,0xff]
|
||||
vmovhpd %xmm25, -1032(%rdx)
|
||||
|
||||
|
|
Loading…
Reference in New Issue