AVX512: Implemented encoding and intrinsics for vpalignr

Added tests for intrinsics and encoding.

Differential Revision: http://reviews.llvm.org/D12270

llvm-svn: 246428
This commit is contained in:
Igor Breger 2015-08-31 11:14:02 +00:00
parent 9f3d55cf3d
commit 2ae0fe3ac3
11 changed files with 419 additions and 46 deletions

View File

@ -5632,15 +5632,35 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">,
def int_x86_avx512_mask_valign_q_512 :
GCCBuiltin<"__builtin_ia32_alignq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">,
def int_x86_avx512_mask_valign_d_512 :
GCCBuiltin<"__builtin_ia32_alignd512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrNoMem]>;
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_palignr_128 :
GCCBuiltin<"__builtin_ia32_palignr128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty,
llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_palignr_256 :
GCCBuiltin<"__builtin_ia32_palignr256_mask">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v32i8_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_palignr_512 :
GCCBuiltin<"__builtin_ia32_palignr512_mask">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v64i8_ty,
llvm_i64_ty], [IntrNoMem]>;
}
// Compares

View File

@ -6902,7 +6902,7 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
Hi = DAG.getBitcast(AlignVT, Hi);
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Lo, Hi,
DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
}
@ -15695,12 +15695,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Imm, Rnd),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_IMM8_MASK:
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.

View File

@ -6441,24 +6441,35 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT SrcInfo.RC:$src2),
(i8 imm:$src3)))>;
let mayLoad = 1 in
defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>;
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i8 imm:$src3))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(i8 imm:$src3))>;
X86VectorVTInfo _>:
avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
let mayLoad = 1 in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@ -6466,7 +6477,6 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B;
}
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@ -6542,6 +6552,20 @@ multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
}
}
multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
let Predicates = [HasBWI] in {
defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
}
let Predicates = [HasBWI, HasVLX] in {
defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
}
}
multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode>{
let Predicates = [HasAVX512] in {
@ -6665,6 +6689,28 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
let Predicates = p in
def NAME#_.VTName#rri:
Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
(!cast<Instruction>(NAME#_.ZSuffix#rri)
_.RC:$src1, _.RC:$src2, imm:$imm)>;
}
multiclass avx512_vpalign_lowering_common<AVX512VLVectorVTInfo _>:
avx512_vpalign_lowering<_.info512, [HasBWI]>,
avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
avx512vl_i8_info, avx512vl_i8_info>,
avx512_vpalign_lowering_common<avx512vl_i16_info>,
avx512_vpalign_lowering_common<avx512vl_i32_info>,
avx512_vpalign_lowering_common<avx512vl_f32_info>,
avx512_vpalign_lowering_common<avx512vl_i64_info>,
avx512_vpalign_lowering_common<avx512vl_f64_info>,
EVEX_CD8<8, CD8VF>;
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),

View File

@ -5799,37 +5799,37 @@ let Predicates = [HasAVX2] in
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGN : ssse3_palignr<"palignr">;
let Predicates = [HasAVX2] in {
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
(VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
(VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
(VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
(VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
}
let Predicates = [HasAVX] in {
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
(VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
(VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
(VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
(VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
}
let Predicates = [UseSSSE3] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
(PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
(PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
(PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
(PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
}
//===---------------------------------------------------------------------===//

View File

@ -23,7 +23,8 @@ enum IntrinsicType {
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@ -753,6 +754,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_palignr_128, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_palignr_256, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_palignr_512, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
@ -1199,9 +1206,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,

View File

@ -214,3 +214,38 @@ define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnon
%x = sext <16 x i1> %bincmp to <16 x i8>
ret <16 x i8> %x
}
define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %shuffle
}
define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i16> %shuffle
}
define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
}
define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i8> %shuffle
}
define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
}
define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
ret <8 x i32> %shuffle
}

View File

@ -566,27 +566,27 @@ declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_valign_q:
; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
; CHECK-LABEL: test_mask_valign_q:
; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_maskz_valign_d:
; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
%res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
%res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
; CHECK-LABEL: test_mask_store_ss

View File

@ -1180,4 +1180,24 @@ define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x
%res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
ret <32 x i16> %res2
}
}
declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_palignr_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1
; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
%res3 = add <64 x i8> %res, %res1
%res4 = add <64 x i8> %res3, %res2
ret <64 x i8> %res4
}

View File

@ -4194,3 +4194,43 @@ define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x
%res2 = add <16 x i16> %res, %res1
ret <16 x i16> %res2
}
declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_palignr_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
%res2 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
%res3 = add <16 x i8> %res, %res1
%res4 = add <16 x i8> %res3, %res2
ret <16 x i8> %res4
}
declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_palignr_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
%res2 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
%res3 = add <32 x i8> %res, %res1
%res4 = add <32 x i8> %res3, %res2
ret <32 x i8> %res4
}

View File

@ -4112,3 +4112,43 @@
// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0xc0,0xdf,0xff,0xff]
vpunpckhwd -8256(%rdx), %zmm26, %zmm30
// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22
// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xf1,0xab]
vpalignr $171, %zmm17, %zmm26, %zmm22
// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3}
// CHECK: encoding: [0x62,0xa3,0x2d,0x43,0x0f,0xf1,0xab]
vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3}
// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3} {z}
// CHECK: encoding: [0x62,0xa3,0x2d,0xc3,0x0f,0xf1,0xab]
vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3} {z}
// CHECK: vpalignr $123, %zmm17, %zmm26, %zmm22
// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xf1,0x7b]
vpalignr $123, %zmm17, %zmm26, %zmm22
// CHECK: vpalignr $123, (%rcx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x31,0x7b]
vpalignr $123, (%rcx), %zmm26, %zmm22
// CHECK: vpalignr $123, 291(%rax,%r14,8), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpalignr $123, 291(%rax,%r14,8), %zmm26, %zmm22
// CHECK: vpalignr $123, 8128(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x72,0x7f,0x7b]
vpalignr $123, 8128(%rdx), %zmm26, %zmm22
// CHECK: vpalignr $123, 8192(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0xb2,0x00,0x20,0x00,0x00,0x7b]
vpalignr $123, 8192(%rdx), %zmm26, %zmm22
// CHECK: vpalignr $123, -8192(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x72,0x80,0x7b]
vpalignr $123, -8192(%rdx), %zmm26, %zmm22
// CHECK: vpalignr $123, -8256(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
vpalignr $123, -8256(%rdx), %zmm26, %zmm22

View File

@ -8399,3 +8399,163 @@
// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0xe0,0xef,0xff,0xff]
vpunpckhwd -4128(%rdx), %ymm25, %ymm28
// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19
// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0xab]
vpalignr $171, %xmm21, %xmm26, %xmm19
// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4}
// CHECK: encoding: [0x62,0xa3,0x2d,0x04,0x0f,0xdd,0xab]
vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4}
// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4} {z}
// CHECK: encoding: [0x62,0xa3,0x2d,0x84,0x0f,0xdd,0xab]
vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4} {z}
// CHECK: vpalignr $123, %xmm21, %xmm26, %xmm19
// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0x7b]
vpalignr $123, %xmm21, %xmm26, %xmm19
// CHECK: vpalignr $123, (%rcx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x19,0x7b]
vpalignr $123, (%rcx), %xmm26, %xmm19
// CHECK: vpalignr $123, 291(%rax,%r14,8), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpalignr $123, 291(%rax,%r14,8), %xmm26, %xmm19
// CHECK: vpalignr $123, 2032(%rdx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x5a,0x7f,0x7b]
vpalignr $123, 2032(%rdx), %xmm26, %xmm19
// CHECK: vpalignr $123, 2048(%rdx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x9a,0x00,0x08,0x00,0x00,0x7b]
vpalignr $123, 2048(%rdx), %xmm26, %xmm19
// CHECK: vpalignr $123, -2048(%rdx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x5a,0x80,0x7b]
vpalignr $123, -2048(%rdx), %xmm26, %xmm19
// CHECK: vpalignr $123, -2064(%rdx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
vpalignr $123, -2064(%rdx), %xmm26, %xmm19
// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27
// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0xde,0xab]
vpalignr $171, %ymm22, %ymm21, %ymm27
// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2}
// CHECK: encoding: [0x62,0x23,0x55,0x22,0x0f,0xde,0xab]
vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2}
// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2} {z}
// CHECK: encoding: [0x62,0x23,0x55,0xa2,0x0f,0xde,0xab]
vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2} {z}
// CHECK: vpalignr $123, %ymm22, %ymm21, %ymm27
// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0xde,0x7b]
vpalignr $123, %ymm22, %ymm21, %ymm27
// CHECK: vpalignr $123, (%rcx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x19,0x7b]
vpalignr $123, (%rcx), %ymm21, %ymm27
// CHECK: vpalignr $123, 291(%rax,%r14,8), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpalignr $123, 291(%rax,%r14,8), %ymm21, %ymm27
// CHECK: vpalignr $123, 4064(%rdx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x5a,0x7f,0x7b]
vpalignr $123, 4064(%rdx), %ymm21, %ymm27
// CHECK: vpalignr $123, 4096(%rdx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x9a,0x00,0x10,0x00,0x00,0x7b]
vpalignr $123, 4096(%rdx), %ymm21, %ymm27
// CHECK: vpalignr $123, -4096(%rdx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x5a,0x80,0x7b]
vpalignr $123, -4096(%rdx), %ymm21, %ymm27
// CHECK: vpalignr $123, -4128(%rdx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x9a,0xe0,0xef,0xff,0xff,0x7b]
vpalignr $123, -4128(%rdx), %ymm21, %ymm27
// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30
// CHECK: encoding: [0x62,0x03,0x5d,0x00,0x0f,0xf1,0xab]
vpalignr $0xab, %xmm25, %xmm20, %xmm30
// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30 {%k2}
// CHECK: encoding: [0x62,0x03,0x5d,0x02,0x0f,0xf1,0xab]
vpalignr $0xab, %xmm25, %xmm20, %xmm30 {%k2}
// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30 {%k2} {z}
// CHECK: encoding: [0x62,0x03,0x5d,0x82,0x0f,0xf1,0xab]
vpalignr $0xab, %xmm25, %xmm20, %xmm30 {%k2} {z}
// CHECK: vpalignr $123, %xmm25, %xmm20, %xmm30
// CHECK: encoding: [0x62,0x03,0x5d,0x00,0x0f,0xf1,0x7b]
vpalignr $0x7b, %xmm25, %xmm20, %xmm30
// CHECK: vpalignr $123, (%rcx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x31,0x7b]
vpalignr $0x7b,(%rcx), %xmm20, %xmm30
// CHECK: vpalignr $123, 4660(%rax,%r14,8), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x23,0x5d,0x00,0x0f,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
vpalignr $0x7b,4660(%rax,%r14,8), %xmm20, %xmm30
// CHECK: vpalignr $123, 2032(%rdx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x72,0x7f,0x7b]
vpalignr $0x7b,2032(%rdx), %xmm20, %xmm30
// CHECK: vpalignr $123, 2048(%rdx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0xb2,0x00,0x08,0x00,0x00,0x7b]
vpalignr $0x7b,2048(%rdx), %xmm20, %xmm30
// CHECK: vpalignr $123, -2048(%rdx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x72,0x80,0x7b]
vpalignr $0x7b,-2048(%rdx), %xmm20, %xmm30
// CHECK: vpalignr $123, -2064(%rdx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
vpalignr $0x7b,-2064(%rdx), %xmm20, %xmm30
// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21
// CHECK: encoding: [0x62,0x83,0x75,0x20,0x0f,0xeb,0xab]
vpalignr $0xab, %ymm27, %ymm17, %ymm21
// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21 {%k7}
// CHECK: encoding: [0x62,0x83,0x75,0x27,0x0f,0xeb,0xab]
vpalignr $0xab, %ymm27, %ymm17, %ymm21 {%k7}
// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21 {%k7} {z}
// CHECK: encoding: [0x62,0x83,0x75,0xa7,0x0f,0xeb,0xab]
vpalignr $0xab, %ymm27, %ymm17, %ymm21 {%k7} {z}
// CHECK: vpalignr $123, %ymm27, %ymm17, %ymm21
// CHECK: encoding: [0x62,0x83,0x75,0x20,0x0f,0xeb,0x7b]
vpalignr $0x7b, %ymm27, %ymm17, %ymm21
// CHECK: vpalignr $123, (%rcx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x29,0x7b]
vpalignr $0x7b,(%rcx), %ymm17, %ymm21
// CHECK: vpalignr $123, 4660(%rax,%r14,8), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xa3,0x75,0x20,0x0f,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
vpalignr $0x7b,4660(%rax,%r14,8), %ymm17, %ymm21
// CHECK: vpalignr $123, 4064(%rdx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x6a,0x7f,0x7b]
vpalignr $0x7b,4064(%rdx), %ymm17, %ymm21
// CHECK: vpalignr $123, 4096(%rdx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0xaa,0x00,0x10,0x00,0x00,0x7b]
vpalignr $0x7b,4096(%rdx), %ymm17, %ymm21
// CHECK: vpalignr $123, -4096(%rdx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x6a,0x80,0x7b]
vpalignr $0x7b,-4096(%rdx), %ymm17, %ymm21
// CHECK: vpalignr $123, -4128(%rdx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0xaa,0xe0,0xef,0xff,0xff,0x7b]
vpalignr $0x7b,-4128(%rdx), %ymm17, %ymm21