AVX512: Implemented encoding and intrinsics for vpalignr

Added tests for intrinsics and encoding.

Differential Revision: http://reviews.llvm.org/D12270

llvm-svn: 246428
This commit is contained in:
Igor Breger 2015-08-31 11:14:02 +00:00
parent 9f3d55cf3d
commit 2ae0fe3ac3
11 changed files with 419 additions and 46 deletions

View File

@ -5632,15 +5632,35 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
} }
let TargetPrefix = "x86" in { let TargetPrefix = "x86" in {
def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">, def int_x86_avx512_mask_valign_q_512 :
GCCBuiltin<"__builtin_ia32_alignq512_mask">,
Intrinsic<[llvm_v8i64_ty], Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
[IntrNoMem]>; llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">, def int_x86_avx512_mask_valign_d_512 :
GCCBuiltin<"__builtin_ia32_alignd512_mask">,
Intrinsic<[llvm_v16i32_ty], Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
[IntrNoMem]>; llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_palignr_128 :
GCCBuiltin<"__builtin_ia32_palignr128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty,
llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_palignr_256 :
GCCBuiltin<"__builtin_ia32_palignr256_mask">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v32i8_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_palignr_512 :
GCCBuiltin<"__builtin_ia32_palignr512_mask">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v64i8_ty,
llvm_i64_ty], [IntrNoMem]>;
} }
// Compares // Compares

View File

@ -6902,7 +6902,7 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
Hi = DAG.getBitcast(AlignVT, Hi); Hi = DAG.getBitcast(AlignVT, Hi);
return DAG.getBitcast( return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo, VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Lo, Hi,
DAG.getConstant(Rotation * Scale, DL, MVT::i8))); DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
} }
@ -15695,12 +15695,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Imm, Rnd), Src1, Src2, Imm, Rnd),
Mask, PassThru, Subtarget, DAG); Mask, PassThru, Subtarget, DAG);
} }
case INTR_TYPE_3OP_IMM8_MASK:
case INTR_TYPE_3OP_MASK: { case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1); SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2); SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3); SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4); SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5); SDValue Mask = Op.getOperand(5);
if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
// We specify 2 possible opcodes for intrinsics with rounding modes. // We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode, // First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand. // (IntrData->Opc1 != 0), then we check the rounding mode operand.

View File

@ -6441,24 +6441,35 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
} }
} }
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT SrcInfo.RC:$src2),
(i8 imm:$src3)))>;
let mayLoad = 1 in
defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>;
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm) // op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm) // op(reg_vec2,broadcast(eltVt),imm)
multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{ X86VectorVTInfo _>:
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", let mayLoad = 1 in
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i8 imm:$src3))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(i8 imm:$src3))>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@ -6466,7 +6477,6 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1), (OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B; (i8 imm:$src3))>, EVEX_B;
}
} }
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@ -6542,6 +6552,20 @@ multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
} }
} }
multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
let Predicates = [HasBWI] in {
defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
}
let Predicates = [HasBWI, HasVLX] in {
defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
}
}
multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode>{ bits<8> opc, SDNode OpNode>{
let Predicates = [HasAVX512] in { let Predicates = [HasAVX512] in {
@ -6665,6 +6689,28 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>, defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
EVEX_CD8<64, CD8VF>, VEX_W; EVEX_CD8<64, CD8VF>, VEX_W;
multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
let Predicates = p in
def NAME#_.VTName#rri:
Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
(!cast<Instruction>(NAME#_.ZSuffix#rri)
_.RC:$src1, _.RC:$src2, imm:$imm)>;
}
multiclass avx512_vpalign_lowering_common<AVX512VLVectorVTInfo _>:
avx512_vpalign_lowering<_.info512, [HasBWI]>,
avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
avx512vl_i8_info, avx512vl_i8_info>,
avx512_vpalign_lowering_common<avx512vl_i16_info>,
avx512_vpalign_lowering_common<avx512vl_i32_info>,
avx512_vpalign_lowering_common<avx512vl_f32_info>,
avx512_vpalign_lowering_common<avx512vl_i64_info>,
avx512_vpalign_lowering_common<avx512vl_f64_info>,
EVEX_CD8<8, CD8VF>;
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> { X86VectorVTInfo _> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),

View File

@ -5799,37 +5799,37 @@ let Predicates = [HasAVX2] in
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGN : ssse3_palignr<"palignr">; defm PALIGN : ssse3_palignr<"palignr">;
let Predicates = [HasAVX2] in { let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
} }
let Predicates = [HasAVX] in { let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
} }
let Predicates = [UseSSSE3] in { let Predicates = [UseSSSE3] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
} }
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//

View File

@ -23,7 +23,8 @@ enum IntrinsicType {
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
VPERM_3OP_MASKZ, VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@ -753,6 +754,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_palignr_128, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_palignr_256, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_palignr_512, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0), X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
@ -1199,9 +1206,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::UNPCKL, 0), X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK, X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
X86ISD::UNPCKL, 0), X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD, X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,

View File

@ -214,3 +214,38 @@ define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnon
%x = sext <16 x i1> %bincmp to <16 x i8> %x = sext <16 x i1> %bincmp to <16 x i8>
ret <16 x i8> %x ret <16 x i8> %x
} }
define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %shuffle
}
define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i16> %shuffle
}
define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
}
define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i8> %shuffle
}
define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
}
define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
ret <8 x i32> %shuffle
}

View File

@ -566,27 +566,27 @@ declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_valign_q: ; CHECK-LABEL: test_valign_q:
; CHECK: valignq $2, %zmm1, %zmm0, %zmm0 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1) %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res ret <8 x i64> %res
} }
define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) { define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
; CHECK-LABEL: test_mask_valign_q: ; CHECK-LABEL: test_mask_valign_q:
; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
%res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask) %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
ret <8 x i64> %res ret <8 x i64> %res
} }
declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8) declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_maskz_valign_d: ; CHECK-LABEL: test_maskz_valign_d:
; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05] ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
%res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask) %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res ret <16 x i32> %res
} }
declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16) declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
; CHECK-LABEL: test_mask_store_ss ; CHECK-LABEL: test_mask_store_ss

View File

@ -1180,4 +1180,24 @@ define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x
%res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1 %res2 = add <32 x i16> %res, %res1
ret <32 x i16> %res2 ret <32 x i16> %res2
} }
declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_palignr_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1
; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
%res3 = add <64 x i8> %res, %res1
%res4 = add <64 x i8> %res3, %res2
ret <64 x i8> %res4
}

View File

@ -4194,3 +4194,43 @@ define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x
%res2 = add <16 x i16> %res, %res1 %res2 = add <16 x i16> %res, %res1
ret <16 x i16> %res2 ret <16 x i16> %res2
} }
declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_palignr_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
%res2 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
%res3 = add <16 x i8> %res, %res1
%res4 = add <16 x i8> %res3, %res2
ret <16 x i8> %res4
}
declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_palignr_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
%res2 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
%res3 = add <32 x i8> %res, %res1
%res4 = add <32 x i8> %res3, %res2
ret <32 x i8> %res4
}

View File

@ -4112,3 +4112,43 @@
// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0xc0,0xdf,0xff,0xff] // CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0xc0,0xdf,0xff,0xff]
vpunpckhwd -8256(%rdx), %zmm26, %zmm30 vpunpckhwd -8256(%rdx), %zmm26, %zmm30
// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22
// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xf1,0xab]
vpalignr $171, %zmm17, %zmm26, %zmm22
// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3}
// CHECK: encoding: [0x62,0xa3,0x2d,0x43,0x0f,0xf1,0xab]
vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3}
// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3} {z}
// CHECK: encoding: [0x62,0xa3,0x2d,0xc3,0x0f,0xf1,0xab]
vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3} {z}
// CHECK: vpalignr $123, %zmm17, %zmm26, %zmm22
// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xf1,0x7b]
vpalignr $123, %zmm17, %zmm26, %zmm22
// CHECK: vpalignr $123, (%rcx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x31,0x7b]
vpalignr $123, (%rcx), %zmm26, %zmm22
// CHECK: vpalignr $123, 291(%rax,%r14,8), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpalignr $123, 291(%rax,%r14,8), %zmm26, %zmm22
// CHECK: vpalignr $123, 8128(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x72,0x7f,0x7b]
vpalignr $123, 8128(%rdx), %zmm26, %zmm22
// CHECK: vpalignr $123, 8192(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0xb2,0x00,0x20,0x00,0x00,0x7b]
vpalignr $123, 8192(%rdx), %zmm26, %zmm22
// CHECK: vpalignr $123, -8192(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x72,0x80,0x7b]
vpalignr $123, -8192(%rdx), %zmm26, %zmm22
// CHECK: vpalignr $123, -8256(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
vpalignr $123, -8256(%rdx), %zmm26, %zmm22

View File

@ -8399,3 +8399,163 @@
// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0xe0,0xef,0xff,0xff] // CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0xe0,0xef,0xff,0xff]
vpunpckhwd -4128(%rdx), %ymm25, %ymm28 vpunpckhwd -4128(%rdx), %ymm25, %ymm28
// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19
// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0xab]
vpalignr $171, %xmm21, %xmm26, %xmm19
// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4}
// CHECK: encoding: [0x62,0xa3,0x2d,0x04,0x0f,0xdd,0xab]
vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4}
// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4} {z}
// CHECK: encoding: [0x62,0xa3,0x2d,0x84,0x0f,0xdd,0xab]
vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4} {z}
// CHECK: vpalignr $123, %xmm21, %xmm26, %xmm19
// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0x7b]
vpalignr $123, %xmm21, %xmm26, %xmm19
// CHECK: vpalignr $123, (%rcx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x19,0x7b]
vpalignr $123, (%rcx), %xmm26, %xmm19
// CHECK: vpalignr $123, 291(%rax,%r14,8), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpalignr $123, 291(%rax,%r14,8), %xmm26, %xmm19
// CHECK: vpalignr $123, 2032(%rdx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x5a,0x7f,0x7b]
vpalignr $123, 2032(%rdx), %xmm26, %xmm19
// CHECK: vpalignr $123, 2048(%rdx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x9a,0x00,0x08,0x00,0x00,0x7b]
vpalignr $123, 2048(%rdx), %xmm26, %xmm19
// CHECK: vpalignr $123, -2048(%rdx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x5a,0x80,0x7b]
vpalignr $123, -2048(%rdx), %xmm26, %xmm19
// CHECK: vpalignr $123, -2064(%rdx), %xmm26, %xmm19
// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
vpalignr $123, -2064(%rdx), %xmm26, %xmm19
// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27
// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0xde,0xab]
vpalignr $171, %ymm22, %ymm21, %ymm27
// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2}
// CHECK: encoding: [0x62,0x23,0x55,0x22,0x0f,0xde,0xab]
vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2}
// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2} {z}
// CHECK: encoding: [0x62,0x23,0x55,0xa2,0x0f,0xde,0xab]
vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2} {z}
// CHECK: vpalignr $123, %ymm22, %ymm21, %ymm27
// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0xde,0x7b]
vpalignr $123, %ymm22, %ymm21, %ymm27
// CHECK: vpalignr $123, (%rcx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x19,0x7b]
vpalignr $123, (%rcx), %ymm21, %ymm27
// CHECK: vpalignr $123, 291(%rax,%r14,8), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpalignr $123, 291(%rax,%r14,8), %ymm21, %ymm27
// CHECK: vpalignr $123, 4064(%rdx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x5a,0x7f,0x7b]
vpalignr $123, 4064(%rdx), %ymm21, %ymm27
// CHECK: vpalignr $123, 4096(%rdx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x9a,0x00,0x10,0x00,0x00,0x7b]
vpalignr $123, 4096(%rdx), %ymm21, %ymm27
// CHECK: vpalignr $123, -4096(%rdx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x5a,0x80,0x7b]
vpalignr $123, -4096(%rdx), %ymm21, %ymm27
// CHECK: vpalignr $123, -4128(%rdx), %ymm21, %ymm27
// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x9a,0xe0,0xef,0xff,0xff,0x7b]
vpalignr $123, -4128(%rdx), %ymm21, %ymm27
// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30
// CHECK: encoding: [0x62,0x03,0x5d,0x00,0x0f,0xf1,0xab]
vpalignr $0xab, %xmm25, %xmm20, %xmm30
// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30 {%k2}
// CHECK: encoding: [0x62,0x03,0x5d,0x02,0x0f,0xf1,0xab]
vpalignr $0xab, %xmm25, %xmm20, %xmm30 {%k2}
// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30 {%k2} {z}
// CHECK: encoding: [0x62,0x03,0x5d,0x82,0x0f,0xf1,0xab]
vpalignr $0xab, %xmm25, %xmm20, %xmm30 {%k2} {z}
// CHECK: vpalignr $123, %xmm25, %xmm20, %xmm30
// CHECK: encoding: [0x62,0x03,0x5d,0x00,0x0f,0xf1,0x7b]
vpalignr $0x7b, %xmm25, %xmm20, %xmm30
// CHECK: vpalignr $123, (%rcx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x31,0x7b]
vpalignr $0x7b,(%rcx), %xmm20, %xmm30
// CHECK: vpalignr $123, 4660(%rax,%r14,8), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x23,0x5d,0x00,0x0f,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
vpalignr $0x7b,4660(%rax,%r14,8), %xmm20, %xmm30
// CHECK: vpalignr $123, 2032(%rdx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x72,0x7f,0x7b]
vpalignr $0x7b,2032(%rdx), %xmm20, %xmm30
// CHECK: vpalignr $123, 2048(%rdx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0xb2,0x00,0x08,0x00,0x00,0x7b]
vpalignr $0x7b,2048(%rdx), %xmm20, %xmm30
// CHECK: vpalignr $123, -2048(%rdx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x72,0x80,0x7b]
vpalignr $0x7b,-2048(%rdx), %xmm20, %xmm30
// CHECK: vpalignr $123, -2064(%rdx), %xmm20, %xmm30
// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
vpalignr $0x7b,-2064(%rdx), %xmm20, %xmm30
// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21
// CHECK: encoding: [0x62,0x83,0x75,0x20,0x0f,0xeb,0xab]
vpalignr $0xab, %ymm27, %ymm17, %ymm21
// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21 {%k7}
// CHECK: encoding: [0x62,0x83,0x75,0x27,0x0f,0xeb,0xab]
vpalignr $0xab, %ymm27, %ymm17, %ymm21 {%k7}
// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21 {%k7} {z}
// CHECK: encoding: [0x62,0x83,0x75,0xa7,0x0f,0xeb,0xab]
vpalignr $0xab, %ymm27, %ymm17, %ymm21 {%k7} {z}
// CHECK: vpalignr $123, %ymm27, %ymm17, %ymm21
// CHECK: encoding: [0x62,0x83,0x75,0x20,0x0f,0xeb,0x7b]
vpalignr $0x7b, %ymm27, %ymm17, %ymm21
// CHECK: vpalignr $123, (%rcx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x29,0x7b]
vpalignr $0x7b,(%rcx), %ymm17, %ymm21
// CHECK: vpalignr $123, 4660(%rax,%r14,8), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xa3,0x75,0x20,0x0f,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
vpalignr $0x7b,4660(%rax,%r14,8), %ymm17, %ymm21
// CHECK: vpalignr $123, 4064(%rdx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x6a,0x7f,0x7b]
vpalignr $0x7b,4064(%rdx), %ymm17, %ymm21
// CHECK: vpalignr $123, 4096(%rdx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0xaa,0x00,0x10,0x00,0x00,0x7b]
vpalignr $0x7b,4096(%rdx), %ymm17, %ymm21
// CHECK: vpalignr $123, -4096(%rdx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x6a,0x80,0x7b]
vpalignr $0x7b,-4096(%rdx), %ymm17, %ymm21
// CHECK: vpalignr $123, -4128(%rdx), %ymm17, %ymm21
// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0xaa,0xe0,0xef,0xff,0xff,0x7b]
vpalignr $0x7b,-4128(%rdx), %ymm17, %ymm21