[X86] Remove and autoupgrade the expandload and compressstore intrinsics.

We use the target independent intrinsics now.

llvm-svn: 334381
This commit is contained in:
Craig Topper 2018-06-11 01:25:22 +00:00
parent 08f5c7b8c3
commit e71ad1f6d0
10 changed files with 1181 additions and 1817 deletions

View File

@ -4613,25 +4613,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_compress_store_ps_512 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty,
llvm_i16_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_pd_512 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_ps_256 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_pd_256 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_ps_128 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_pd_128 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_d_512 :
GCCBuiltin<"__builtin_ia32_compresssi512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
@ -4657,25 +4638,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_compress_store_d_512 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty,
llvm_i16_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_q_512 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_d_256 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_q_256 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_d_128 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_q_128 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_b_512 :
GCCBuiltin<"__builtin_ia32_compressqi512_mask">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
@ -4701,25 +4663,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_compress_store_b_512 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v64i8_ty,
llvm_i64_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_w_512 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty,
llvm_i32_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_b_256 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty,
llvm_i32_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_w_256 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty,
llvm_i16_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_b_128 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty,
llvm_i16_ty], [IntrArgMemOnly]>;
def int_x86_avx512_mask_compress_store_w_128 : // FIXME: remove
Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrArgMemOnly]>;
// expand
def int_x86_avx512_mask_expand_ps_512 :
GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
@ -4746,25 +4689,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_load_ps_512 : // FIXME: remove
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty,
llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_pd_512 : // FIXME: remove
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_ps_256 : // FIXME: remove
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_pd_256 : // FIXME: remove
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_ps_128 : // FIXME: remove
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_pd_128 : // FIXME: remove
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_d_512 :
GCCBuiltin<"__builtin_ia32_expandsi512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
@ -4790,25 +4714,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_load_d_512 : // FIXME: remove
Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty, llvm_v16i32_ty,
llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_q_512 : // FIXME: remove
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty, llvm_v8i64_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_d_256 : // FIXME: remove
Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_q_256 : // FIXME: remove
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_d_128 : // FIXME: remove
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_q_128 : // FIXME: remove
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_b_512 :
GCCBuiltin<"__builtin_ia32_expandqi512_mask">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
@ -4833,25 +4738,6 @@ let TargetPrefix = "x86" in {
GCCBuiltin<"__builtin_ia32_expandhi128_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_expand_load_b_512 : // FIXME: remove
Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty, llvm_v64i8_ty,
llvm_i64_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_w_512 : // FIXME: remove
Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty, llvm_v32i16_ty,
llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_b_256 : // FIXME: remove
Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_v32i8_ty,
llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_w_256 : // FIXME: remove
Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_v16i16_ty,
llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_b_128 : // FIXME: remove
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_v16i8_ty,
llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_x86_avx512_mask_expand_load_w_128 : // FIXME: remove
Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
}
// VBMI2 Concat & Shift

View File

@ -302,6 +302,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "avx512.mask.store.ss" || // Added in 7.0
Name.startswith("avx512.mask.loadu.") || // Added in 3.9
Name.startswith("avx512.mask.load.") || // Added in 3.9
Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
Name == "sse42.crc32.64.8" || // Added in 3.4
Name.startswith("avx.vbroadcast.s") || // Added in 3.5
Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
@ -1659,6 +1661,36 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
CI->getArgOperand(1),CI->getArgOperand(2),
/*Aligned*/true);
} else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
Type *ResultTy = CI->getType();
Type *PtrTy = ResultTy->getVectorElementType();
// Cast the pointer to element type.
Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
llvm::PointerType::getUnqual(PtrTy));
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
ResultTy->getVectorNumElements());
Function *ELd = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_expandload,
ResultTy);
Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
} else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
Type *ResultTy = CI->getArgOperand(1)->getType();
Type *PtrTy = ResultTy->getVectorElementType();
// Cast the pointer to element type.
Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
llvm::PointerType::getUnqual(PtrTy));
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
ResultTy->getVectorNumElements());
Function *CSt = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::masked_compressstore,
ResultTy);
Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
} else if (IsX86 && Name.startswith("xop.vpcom")) {
Intrinsic::ID intID;
if (Name.endswith("ub"))

View File

@ -4606,20 +4606,6 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.offset = 0;
switch (IntrData->Type) {
case EXPAND_FROM_MEM: {
Info.ptrVal = I.getArgOperand(0);
Info.memVT = MVT::getVT(I.getType());
Info.align = 1;
Info.flags |= MachineMemOperand::MOLoad;
break;
}
case COMPRESS_TO_MEM: {
Info.ptrVal = I.getArgOperand(0);
Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
Info.align = 1;
Info.flags |= MachineMemOperand::MOStore;
break;
}
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {
@ -21577,27 +21563,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDValue Results[] = { SetCC, Store };
return DAG.getMergeValues(Results, dl);
}
case COMPRESS_TO_MEM: {
SDValue Mask = Op.getOperand(4);
SDValue DataToCompress = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
MVT VT = DataToCompress.getSimpleValueType();
MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
if (isAllOnesConstant(Mask)) // return just a store
return DAG.getStore(Chain, dl, DataToCompress, Addr,
MemIntr->getMemOperand());
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getMaskedStore(Chain, dl, DataToCompress, Addr, VMask, VT,
MemIntr->getMemOperand(),
false /* truncating */, true /* compressing */);
}
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {
@ -21641,28 +21606,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
llvm_unreachable("Unsupported truncstore intrinsic");
}
}
case EXPAND_FROM_MEM: {
SDValue Mask = Op.getOperand(4);
SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
MVT VT = Op.getSimpleValueType();
MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
if (isAllOnesConstant(Mask)) // Return a regular (unmasked) vector load.
return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand());
if (X86::isZeroNode(Mask))
return DAG.getMergeValues({PassThru, Chain}, dl);
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT,
MemIntr->getMemOperand(), ISD::NON_EXTLOAD,
true /* expanding */);
}
}
}

View File

@ -33,9 +33,8 @@ enum IntrinsicType : uint16_t {
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
COMPRESS_EXPAND_IN_REG,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM,
FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, GATHER_AVX2,
ROUNDP, ROUNDS
@ -120,78 +119,6 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
X86_INTRINSIC_DATA(avx512_mask_compress_store_b_128,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_b_256,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_b_512,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_d_128,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_d_256,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_d_512,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_128,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_256,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_512,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_128,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_256,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_512,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_128,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_256,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_w_128,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_w_256,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_w_512,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_b_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_b_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_b_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_pd_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_ps_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_w_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_w_256,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_w_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,

View File

@ -7802,3 +7802,399 @@ define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x flo
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_pd_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_pd_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) {
; X86-LABEL: test_compress_store_pd_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_compress_store_pd_512:
; X64: ## %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
; X86-LABEL: test_mask_compress_store_ps_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_ps_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) {
; X86-LABEL: test_compress_store_ps_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_compress_store_ps_512:
; X64: ## %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1)
ret void
}
define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_q_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_q_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) {
; X86-LABEL: test_compress_store_q_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_compress_store_q_512:
; X64: ## %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
; X86-LABEL: test_mask_compress_store_d_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_d_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) {
; X86-LABEL: test_compress_store_d_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_compress_store_d_512:
; X64: ## %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1)
ret void
}
define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_pd_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_pd_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_pd_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_pd_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
; X86-LABEL: test_expand_load_pd_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_expand_load_pd_512:
; X64: ## %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret <8 x double> %res
}
; Make sure we don't crash if you pass 0 to the mask.
define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
; X86-LABEL: test_zero_mask_expand_load_pd_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8]
; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_zero_mask_expand_load_pd_512:
; X64: ## %bb.0:
; X64-NEXT: kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8]
; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0)
ret <8 x double> %res
}
define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
; X86-LABEL: test_mask_expand_load_ps_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_ps_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) {
; X86-LABEL: test_maskz_expand_load_ps_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_ps_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) {
; X86-LABEL: test_expand_load_ps_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_expand_load_ps_512:
; X64: ## %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1)
ret <16 x float> %res
}
define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_q_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_q_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_q_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_q_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) {
; X86-LABEL: test_expand_load_q_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_expand_load_q_512:
; X64: ## %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1)
ret <8 x i64> %res
}
define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
; X86-LABEL: test_mask_expand_load_d_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_d_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) {
; X86-LABEL: test_maskz_expand_load_d_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_d_512:
; X64: ## %bb.0:
; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) {
; X86-LABEL: test_expand_load_d_512:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
; X86-NEXT: retl ## encoding: [0xc3]
;
; X64-LABEL: test_expand_load_d_512:
; X64: ## %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
; X64-NEXT: retq ## encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
ret <16 x i32> %res
}

View File

@ -2,19 +2,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_mask_compress_store_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_compress_pd_512:
; CHECK: ## %bb.0:
@ -46,29 +33,6 @@ define <8 x double> @test_compress_pd_512(<8 x double> %data) {
declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) {
; CHECK-LABEL: test_compress_store_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
; CHECK-LABEL: test_mask_compress_store_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vcompressps %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_ps_512:
; CHECK: ## %bb.0:
@ -100,29 +64,6 @@ define <16 x float> @test_compress_ps_512(<16 x float> %data) {
declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) {
; CHECK-LABEL: test_compress_store_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1)
ret void
}
define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
; CHECK-LABEL: test_mask_compress_store_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_compress_q_512:
; CHECK: ## %bb.0:
@ -154,29 +95,6 @@ define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) {
; CHECK-LABEL: test_compress_store_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_mask_compress_store_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpcompressd %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_d_512:
; CHECK: ## %bb.0:
@ -208,38 +126,6 @@ define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) {
; CHECK-LABEL: test_compress_store_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1)
ret void
}
define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_mask_expand_load_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_load_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
define <8 x double> @test_expand_pd_512(<8 x double> %data) {
; CHECK-LABEL: test_expand_pd_512:
; CHECK: ## %bb.0:
@ -271,46 +157,6 @@ define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
; CHECK-LABEL: test_expand_load_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret <8 x double> %res
}
; Make sure we don't crash if you pass 0 to the mask.
define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_zero_mask_expand_load_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0)
ret <8 x double> %res
}
define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
; CHECK-LABEL: test_mask_expand_load_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vexpandps (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_load_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
define <16 x float> @test_expand_ps_512(<16 x float> %data) {
; CHECK-LABEL: test_expand_ps_512:
; CHECK: ## %bb.0:
@ -342,37 +188,6 @@ define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) {
; CHECK-LABEL: test_expand_load_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1)
ret <16 x float> %res
}
define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
; CHECK-LABEL: test_mask_expand_load_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_load_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
; CHECK-LABEL: test_expand_q_512:
; CHECK: ## %bb.0:
@ -404,37 +219,6 @@ define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) {
; CHECK-LABEL: test_expand_load_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1)
ret <8 x i64> %res
}
define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_mask_expand_load_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpexpandd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_load_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpexpandd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
; CHECK-LABEL: test_expand_d_512:
; CHECK: ## %bb.0:
@ -466,15 +250,6 @@ define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) {
; CHECK-LABEL: test_expand_load_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
ret <16 x i32> %res
}
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_rcp_ps_512:
; CHECK: ## %bb.0:

View File

@ -2,42 +2,6 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; X86-LABEL: test_mask_expand_load_w_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_w_512:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) {
; X86-LABEL: test_maskz_expand_load_w_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_w_512:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
; CHECK-LABEL: test_expand_w_512:
; CHECK: # %bb.0:
@ -82,61 +46,6 @@ define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
; X86-LABEL: test_expand_load_w_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_w_512:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 -1)
ret <32 x i16> %res
}
define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; X86-LABEL: test_mask_expand_load_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_b_512:
; X64: # %bb.0:
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) {
; X86-LABEL: test_maskz_expand_load_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_b_512:
; X64: # %bb.0:
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
; CHECK-LABEL: test_expand_b_512:
; CHECK: # %bb.0:
@ -185,191 +94,6 @@ define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
; X86-LABEL: test_expand_load_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_b_512:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 -1)
ret <64 x i8> %res
}
define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; X86-LABEL: test_mask_compress_store_w_512:
; X86: # %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_w_512:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
; X86-LABEL: test_mask_compress_w_512:
; X86: # %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_w_512:
; X64: # %bb.0:
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
; X86-LABEL: test_maskz_compress_w_512:
; X86: # %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_compress_w_512:
; X64: # %bb.0:
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
; CHECK-LABEL: test_compress_w_512:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
; X86-LABEL: test_compress_store_w_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %zmm0, (%eax) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_w_512:
; X64: # %bb.0:
; X64-NEXT: vmovups %zmm0, (%rdi) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 -1)
ret void
}
define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; X86-LABEL: test_mask_compress_store_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_b_512:
; X64: # %bb.0:
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
; X86-LABEL: test_mask_compress_b_512:
; X86: # %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_b_512:
; X64: # %bb.0:
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
; X86-LABEL: test_maskz_compress_b_512:
; X86: # %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_compress_b_512:
; X64: # %bb.0:
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
; CHECK-LABEL: test_compress_b_512:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
; X86-LABEL: test_compress_store_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %zmm0, (%eax) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_b_512:
; X64: # %bb.0:
; X64-NEXT: vmovups %zmm0, (%rdi) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 -1)
ret void
}
define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
; X86: # %bb.0:

View File

@ -2,44 +2,6 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_w_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_w_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_w_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_w_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
; CHECK-LABEL: test_expand_w_128:
; CHECK: # %bb.0:
@ -86,57 +48,6 @@ define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) {
; X86-LABEL: test_expand_load_w_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_w_128:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 -1)
ret <8 x i16> %res
}
define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; X86-LABEL: test_mask_expand_load_b_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_b_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) {
; X86-LABEL: test_maskz_expand_load_b_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_b_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
; CHECK-LABEL: test_expand_b_128:
; CHECK: # %bb.0:
@ -181,41 +92,6 @@ define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) {
; X86-LABEL: test_expand_load_b_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_b_128:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 -1)
ret <16 x i8> %res
}
define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_w_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_w_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_w_128:
; X86: # %bb.0:
@ -262,40 +138,6 @@ define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) {
; X86-LABEL: test_compress_store_w_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_w_128:
; X64: # %bb.0:
; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; X86-LABEL: test_mask_compress_store_b_128:
; X86: # %bb.0:
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_b_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
; X86-LABEL: test_mask_compress_b_128:
; X86: # %bb.0:
@ -340,57 +182,6 @@ define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) {
; X86-LABEL: test_compress_store_b_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_b_128:
; X64: # %bb.0:
; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 -1)
ret void
}
define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; X86-LABEL: test_mask_expand_load_w_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_w_256:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) {
; X86-LABEL: test_maskz_expand_load_w_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_w_256:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
; CHECK-LABEL: test_expand_w_256:
; CHECK: # %bb.0:
@ -435,57 +226,6 @@ define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) {
; X86-LABEL: test_expand_load_w_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_w_256:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 -1)
ret <16 x i16> %res
}
define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; X86-LABEL: test_mask_expand_load_b_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_b_256:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) {
; X86-LABEL: test_maskz_expand_load_b_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_b_256:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> zeroinitializer, i32 %mask)
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
; CHECK-LABEL: test_expand_b_256:
; CHECK: # %bb.0:
@ -530,42 +270,6 @@ define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) {
; X86-LABEL: test_expand_load_b_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_b_256:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 -1)
ret <32 x i8> %res
}
define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; X86-LABEL: test_mask_compress_store_w_256:
; X86: # %bb.0:
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_w_256:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
; X86-LABEL: test_mask_compress_w_256:
; X86: # %bb.0:
@ -610,44 +314,6 @@ define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) {
; X86-LABEL: test_compress_store_w_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_w_256:
; X64: # %bb.0:
; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 -1)
ret void
}
define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; X86-LABEL: test_mask_compress_store_b_256:
; X86: # %bb.0:
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_b_256:
; X64: # %bb.0:
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
; X86-LABEL: test_mask_compress_b_256:
; X86: # %bb.0:
@ -692,23 +358,6 @@ define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) {
; X86-LABEL: test_compress_store_b_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_b_256:
; X64: # %bb.0:
; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 -1)
ret void
}
define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
; X86: # %bb.0:

View File

@ -11318,3 +11318,755 @@ define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x
%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2
}
define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_pd_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_pd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) {
; X86-LABEL: test_compress_store_pd_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_pd_128:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_ps_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_ps_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) {
; X86-LABEL: test_compress_store_ps_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_ps_128:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_q_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_q_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) {
; X86-LABEL: test_compress_store_q_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_q_128:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_d_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_d_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) {
; X86-LABEL: test_compress_store_d_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_d_128:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1)
ret void
}
define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_pd_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_pd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
ret <2 x double> %res
}
define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_pd_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_pd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) {
; X86-LABEL: test_expand_load_pd_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_pd_128:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1)
ret <2 x double> %res
}
define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_ps_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_ps_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
ret <4 x float> %res
}
define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_ps_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_ps_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) {
; X86-LABEL: test_expand_load_ps_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_ps_128:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1)
ret <4 x float> %res
}
define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_q_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_q_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
ret <2 x i64> %res
}
define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_q_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_q_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask)
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) {
; X86-LABEL: test_expand_load_q_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_q_128:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1)
ret <2 x i64> %res
}
define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_d_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_d_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
ret <4 x i32> %res
}
define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_d_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_d_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) {
; X86-LABEL: test_expand_load_d_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_d_128:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1)
ret <4 x i32> %res
}
define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_pd_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_pd_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) {
; X86-LABEL: test_compress_store_pd_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_pd_256:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_ps_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_ps_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) {
; X86-LABEL: test_compress_store_ps_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_ps_256:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_q_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_q_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) {
; X86-LABEL: test_compress_store_q_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_q_256:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_d_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_d_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) {
; X86-LABEL: test_compress_store_d_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_d_256:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1)
ret void
}
define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_pd_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_pd_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
ret <4 x double> %res
}
define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_pd_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_pd_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask)
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) {
; X86-LABEL: test_expand_load_pd_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_pd_256:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1)
ret <4 x double> %res
}
define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_ps_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_ps_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
ret <8 x float> %res
}
define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_ps_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_ps_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask)
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) {
; X86-LABEL: test_expand_load_ps_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_ps_256:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1)
ret <8 x float> %res
}
define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_q_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_q_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
ret <4 x i64> %res
}
define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_q_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_q_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask)
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) {
; X86-LABEL: test_expand_load_q_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_q_256:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1)
ret <4 x i64> %res
}
define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_d_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_d_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
ret <8 x i32> %res
}
define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_d_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_d_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask)
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) {
; X86-LABEL: test_expand_load_d_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_d_256:
; X64: # %bb.0:
; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1)
ret <8 x i32> %res
}

View File

@ -2,26 +2,6 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_pd_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_pd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_pd_128:
; X86: # %bb.0:
@ -68,41 +48,6 @@ define <2 x double> @test_compress_pd_128(<2 x double> %data) {
declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) {
; X86-LABEL: test_compress_store_pd_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_pd_128:
; X64: # %bb.0:
; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_ps_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_ps_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_ps_128:
; X86: # %bb.0:
@ -149,41 +94,6 @@ define <4 x float> @test_compress_ps_128(<4 x float> %data) {
declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) {
; X86-LABEL: test_compress_store_ps_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_ps_128:
; X64: # %bb.0:
; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_q_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_q_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_q_128:
; X86: # %bb.0:
@ -230,41 +140,6 @@ define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) {
; X86-LABEL: test_compress_store_q_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_q_128:
; X64: # %bb.0:
; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_d_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_d_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_d_128:
; X86: # %bb.0:
@ -311,59 +186,6 @@ define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) {
; X86-LABEL: test_compress_store_d_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_d_128:
; X64: # %bb.0:
; X64-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1)
ret void
}
define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_pd_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_pd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
ret <2 x double> %res
}
define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_pd_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_pd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
define <2 x double> @test_expand_pd_128(<2 x double> %data) {
; CHECK-LABEL: test_expand_pd_128:
; CHECK: # %bb.0:
@ -410,59 +232,6 @@ define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) {
; X86-LABEL: test_expand_load_pd_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_pd_128:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1)
ret <2 x double> %res
}
define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_ps_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_ps_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
ret <4 x float> %res
}
define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_ps_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_ps_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
define <4 x float> @test_expand_ps_128(<4 x float> %data) {
; CHECK-LABEL: test_expand_ps_128:
; CHECK: # %bb.0:
@ -509,59 +278,6 @@ define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) {
; X86-LABEL: test_expand_load_ps_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_ps_128:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1)
ret <4 x float> %res
}
define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_q_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_q_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
ret <2 x i64> %res
}
define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_q_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_q_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask)
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
; CHECK-LABEL: test_expand_q_128:
; CHECK: # %bb.0:
@ -608,59 +324,6 @@ define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) {
; X86-LABEL: test_expand_load_q_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_q_128:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1)
ret <2 x i64> %res
}
define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_d_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_d_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
ret <4 x i32> %res
}
define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_d_128:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_d_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
; CHECK-LABEL: test_expand_d_128:
; CHECK: # %bb.0:
@ -707,43 +370,6 @@ define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) {
; X86-LABEL: test_expand_load_d_128:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_d_128:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1)
ret <4 x i32> %res
}
define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_pd_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_pd_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_pd_256:
; X86: # %bb.0:
@ -790,45 +416,6 @@ define <4 x double> @test_compress_pd_256(<4 x double> %data) {
declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) {
; X86-LABEL: test_compress_store_pd_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_pd_256:
; X64: # %bb.0:
; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_ps_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_ps_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_ps_256:
; X86: # %bb.0:
@ -875,45 +462,6 @@ define <8 x float> @test_compress_ps_256(<8 x float> %data) {
declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) {
; X86-LABEL: test_compress_store_ps_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_ps_256:
; X64: # %bb.0:
; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_q_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_q_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_q_256:
; X86: # %bb.0:
@ -960,45 +508,6 @@ define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) {
; X86-LABEL: test_compress_store_q_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_q_256:
; X64: # %bb.0:
; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_d_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_compress_store_d_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_d_256:
; X86: # %bb.0:
@ -1045,61 +554,6 @@ define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) {
; X86-LABEL: test_compress_store_d_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_compress_store_d_256:
; X64: # %bb.0:
; X64-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1)
ret void
}
define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_pd_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_pd_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
ret <4 x double> %res
}
define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_pd_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_pd_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask)
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
define <4 x double> @test_expand_pd_256(<4 x double> %data) {
; CHECK-LABEL: test_expand_pd_256:
; CHECK: # %bb.0:
@ -1146,59 +600,6 @@ define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) {
; X86-LABEL: test_expand_load_pd_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_pd_256:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1)
ret <4 x double> %res
}
define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_ps_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_ps_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
ret <8 x float> %res
}
define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_ps_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_ps_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask)
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
define <8 x float> @test_expand_ps_256(<8 x float> %data) {
; CHECK-LABEL: test_expand_ps_256:
; CHECK: # %bb.0:
@ -1245,59 +646,6 @@ define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) {
; X86-LABEL: test_expand_load_ps_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_ps_256:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1)
ret <8 x float> %res
}
define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_q_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_q_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
ret <4 x i64> %res
}
define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_q_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_q_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask)
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
; CHECK-LABEL: test_expand_q_256:
; CHECK: # %bb.0:
@ -1344,59 +692,6 @@ define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) {
; X86-LABEL: test_expand_load_q_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_q_256:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1)
ret <4 x i64> %res
}
define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
; X86-LABEL: test_mask_expand_load_d_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_mask_expand_load_d_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
ret <8 x i32> %res
}
define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) {
; X86-LABEL: test_maskz_expand_load_d_256:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_maskz_expand_load_d_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask)
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
; CHECK-LABEL: test_expand_d_256:
; CHECK: # %bb.0:
@ -1443,21 +738,6 @@ define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) {
; X86-LABEL: test_expand_load_d_256:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_expand_load_d_256:
; X64: # %bb.0:
; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1)
ret <8 x i32> %res
}
define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: test_cmpps_256:
; CHECK: # %bb.0: