AVX-512: all forms of SCATTER instruction on SKX,

encoding, intrinsics and tests.

llvm-svn: 240936
This commit is contained in:
Elena Demikhovsky 2015-06-29 12:14:24 +00:00
parent b065ff6caf
commit 30bc4ca313
7 changed files with 567 additions and 32 deletions

View File

@ -4634,6 +4634,102 @@ let TargetPrefix = "x86" in {
llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatterdiv2_df :
GCCBuiltin<"__builtin_ia32_scatterdiv2df">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatterdiv2_di :
GCCBuiltin<"__builtin_ia32_scatterdiv2di">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatterdiv4_df :
GCCBuiltin<"__builtin_ia32_scatterdiv4df">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatterdiv4_di :
GCCBuiltin<"__builtin_ia32_scatterdiv4di">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatterdiv4_sf :
GCCBuiltin<"__builtin_ia32_scatterdiv4sf">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatterdiv4_si :
GCCBuiltin<"__builtin_ia32_scatterdiv4si">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatterdiv8_sf :
GCCBuiltin<"__builtin_ia32_scatterdiv8sf">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scatterdiv8_si :
GCCBuiltin<"__builtin_ia32_scatterdiv8si">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scattersiv2_df :
GCCBuiltin<"__builtin_ia32_scattersiv2df">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scattersiv2_di :
GCCBuiltin<"__builtin_ia32_scattersiv2di">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scattersiv4_df :
GCCBuiltin<"__builtin_ia32_scattersiv4df">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scattersiv4_di :
GCCBuiltin<"__builtin_ia32_scattersiv4di">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scattersiv4_sf :
GCCBuiltin<"__builtin_ia32_scattersiv4sf">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scattersiv4_si :
GCCBuiltin<"__builtin_ia32_scattersiv4si">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scattersiv8_sf :
GCCBuiltin<"__builtin_ia32_scattersiv8sf">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_scattersiv8_si :
GCCBuiltin<"__builtin_ia32_scattersiv8si">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
[IntrReadWriteArgMem]>;
// gather prefetch
def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,

View File

@ -15488,7 +15488,12 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index, SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
if (!C)
llvm_unreachable("Invalid scale type");
unsigned ScaleVal = C->getZExtValue();
if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
llvm_unreachable("Valid scale values are 1, 2, 4, 8");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@ -15498,8 +15503,16 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else
MaskInReg = DAG.getBitcast(MaskVT, Mask);
else {
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
}
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);

View File

@ -5596,40 +5596,58 @@ defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag ScatterNode> {
let mayStore = 1, Constraints = "$mask = $mask_wb" in
let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
(ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr,
!strconcat(OpcodeStr#_.Suffix,
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
[(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
_.KRCWM:$mask, vectoraddr:$dst))]>,
EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
let ExeDomain = SSEPackedDouble in {
defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem,
mscatterv8i32>, EVEX_V512, VEX_W;
defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem,
mscatterv8i64>, EVEX_V512, VEX_W;
multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
vy32xmem, mscatterv8i32>, EVEX_V512, VEX_W;
defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
vz64mem, mscatterv8i64>, EVEX_V512, VEX_W;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
vx32xmem, mscatterv4i32>, EVEX_V256, VEX_W;
defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
vy64xmem, mscatterv4i64>, EVEX_V256, VEX_W;
defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
vx32xmem, mscatterv4i32>, EVEX_V128, VEX_W;
defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
vx64xmem, mscatterv2i64>, EVEX_V128, VEX_W;
}
}
let ExeDomain = SSEPackedSingle in {
defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem,
mscatterv16i32>, EVEX_V512;
defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem,
mscatterv8i64>, EVEX_V512;
multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz32mem,
mscatterv16i32>, EVEX_V512;
defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz64mem,
mscatterv8i64>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
vy32xmem, mscatterv8i32>, EVEX_V256;
defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
vy64xmem, mscatterv4i64>, EVEX_V256;
defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
vx32xmem, mscatterv4i32>, EVEX_V128;
defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
vx64xmem, mscatterv2i64>, EVEX_V128;
}
}
defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem,
mscatterv8i32>, EVEX_V512, VEX_W;
defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem,
mscatterv16i32>, EVEX_V512;
defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem,
mscatterv8i64>, EVEX_V512, VEX_W;
defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem,
mscatterv8i64>, EVEX_V512;
defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,

View File

@ -606,6 +606,30 @@ def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return false;
}]>;
def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_scatter node:$src1, node:$src2, node:$src3) , [{
if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
return (Sc->getIndex().getValueType() == MVT::v2i64 ||
Sc->getBasePtr().getValueType() == MVT::v2i64);
return false;
}]>;
def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_scatter node:$src1, node:$src2, node:$src3) , [{
if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
return (Sc->getIndex().getValueType() == MVT::v4i32 ||
Sc->getBasePtr().getValueType() == MVT::v4i32);
return false;
}]>;
def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_scatter node:$src1, node:$src2, node:$src3) , [{
if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
return (Sc->getIndex().getValueType() == MVT::v4i64 ||
Sc->getBasePtr().getValueType() == MVT::v4i64);
return false;
}]>;
def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_scatter node:$src1, node:$src2, node:$src3) , [{
if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))

View File

@ -146,15 +146,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH,
X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH,
X86::VSCATTERPF0DPSm, X86::VSCATTERPF1DPSm),
X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH,
X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH,
X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm),
X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
X86::VSCATTERPF1DPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
X86::VSCATTERPF1DPSm),
X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm,
X86::VSCATTERPF1QPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
X86::VSCATTERPF1QPSm),
X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),

View File

@ -548,3 +548,244 @@ define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: kxnorw %k2, %k2, %k2
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,0) {%k2}
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 0)
call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 0)
call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 0)
call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 0)
call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 0)
call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: kxnorw %k2, %k2, %k2
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,0) {%k2}
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 0)
call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 0)
call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 0)
call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: kxnorw %k2, %k2, %k2
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,0) {%k2}
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 0)
call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: kxnorw %k2, %k2, %k2
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,0) {%k2}
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 0)
call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 0)
call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: kxnorw %k2, %k2, %k2
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,0) {%k2}
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 0)
call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 0)
call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 0)
call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 0)
call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
ret void
}
declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,0) {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 0)
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
ret void
}

View File

@ -1643,3 +1643,131 @@
// CHECK: vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x21,0x93,0x9c,0xb9,0x00,0x04,0x00,0x00]
vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1}
// CHECK: vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterdd %xmm20, 256(%r9,%xmm31) {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa0,0x64,0x39,0x40]
vpscatterdd %xmm20, 256(%r9,%xmm31) {%k1}
// CHECK: vpscatterdd %xmm20, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00]
vpscatterdd %xmm20, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1}
// CHECK: vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1}
// CHECK: vpscatterdd %ymm28, 256(%r9,%ymm31) {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa0,0x64,0x39,0x40]
vpscatterdd %ymm28, 256(%r9,%ymm31) {%k1}
// CHECK: vpscatterdd %ymm28, 1024(%rcx,%ymm31,4) {%k1}
// CHECK: encoding: [0x62,0x22,0x7d,0x21,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00]
vpscatterdd %ymm28, 1024(%rcx,%ymm31,4) {%k1}
// CHECK: vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa0,0xac,0xfe,0x7b,0x00,0x00,0x00]
vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa0,0xac,0xfe,0x7b,0x00,0x00,0x00]
vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterdq %xmm21, 256(%r9,%xmm31) {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa0,0x6c,0x39,0x20]
vpscatterdq %xmm21, 256(%r9,%xmm31) {%k1}
// CHECK: vpscatterdq %xmm21, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0xa0,0xac,0xb9,0x00,0x04,0x00,0x00]
vpscatterdq %xmm21, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterdq %ymm28, 256(%r9,%xmm31) {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa0,0x64,0x39,0x20]
vpscatterdq %ymm28, 256(%r9,%xmm31) {%k1}
// CHECK: vpscatterdq %ymm28, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: encoding: [0x62,0x22,0xfd,0x21,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00]
vpscatterdq %ymm28, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa1,0xb4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa1,0xb4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterqd %xmm22, 256(%r9,%xmm31) {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0xa1,0x74,0x39,0x40]
vpscatterqd %xmm22, 256(%r9,%xmm31) {%k1}
// CHECK: vpscatterqd %xmm22, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0xa1,0xb4,0xb9,0x00,0x04,0x00,0x00]
vpscatterqd %xmm22, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa1,0x84,0xfe,0x7b,0x00,0x00,0x00]
vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1}
// CHECK: vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa1,0x84,0xfe,0x7b,0x00,0x00,0x00]
vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1}
// CHECK: vpscatterqd %xmm24, 256(%r9,%ymm31) {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa1,0x44,0x39,0x40]
vpscatterqd %xmm24, 256(%r9,%ymm31) {%k1}
// CHECK: vpscatterqd %xmm24, 1024(%rcx,%ymm31,4) {%k1}
// CHECK: encoding: [0x62,0x22,0x7d,0x21,0xa1,0x84,0xb9,0x00,0x04,0x00,0x00]
vpscatterqd %xmm24, 1024(%rcx,%ymm31,4) {%k1}
// CHECK: vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x01,0xa1,0xa4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x01,0xa1,0xa4,0xfe,0x7b,0x00,0x00,0x00]
vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1}
// CHECK: vpscatterqq %xmm28, 256(%r9,%xmm31) {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x01,0xa1,0x64,0x39,0x20]
vpscatterqq %xmm28, 256(%r9,%xmm31) {%k1}
// CHECK: vpscatterqq %xmm28, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: encoding: [0x62,0x22,0xfd,0x01,0xa1,0xa4,0xb9,0x00,0x04,0x00,0x00]
vpscatterqq %xmm28, 1024(%rcx,%xmm31,4) {%k1}
// CHECK: vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa1,0x9c,0xfe,0x7b,0x00,0x00,0x00]
vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1}
// CHECK: vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa1,0x9c,0xfe,0x7b,0x00,0x00,0x00]
vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1}
// CHECK: vpscatterqq %ymm19, 256(%r9,%ymm31) {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa1,0x5c,0x39,0x20]
vpscatterqq %ymm19, 256(%r9,%ymm31) {%k1}
// CHECK: vpscatterqq %ymm19, 1024(%rcx,%ymm31,4) {%k1}
// CHECK: encoding: [0x62,0xa2,0xfd,0x21,0xa1,0x9c,0xb9,0x00,0x04,0x00,0x00]
vpscatterqq %ymm19, 1024(%rcx,%ymm31,4) {%k1}