From 358b09497165f7b77dfa7fd459eb5c1b70d1d897 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 May 2018 23:15:00 +0000 Subject: [PATCH] [X86] Remove 128/256-bit cvtdq2ps, cvtudq2ps, cvtqq2pd, cvtuqq2pd intrinsics. These can all be implemented with sitofp/uitofp instructions. llvm-svn: 332916 --- llvm/include/llvm/IR/IntrinsicsX86.td | 40 ---- llvm/lib/IR/AutoUpgrade.cpp | 57 +++-- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 14 -- .../CodeGen/X86/avx-intrinsics-fast-isel.ll | 3 +- .../CodeGen/X86/avx-intrinsics-x86-upgrade.ll | 11 + llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 16 -- .../X86/avx512dqvl-intrinsics-fast-isel.ll | 225 ++++++++++++++++++ .../X86/avx512dqvl-intrinsics-upgrade.ll | 64 +++++ .../test/CodeGen/X86/avx512dqvl-intrinsics.ll | 64 ----- .../X86/avx512vl-intrinsics-fast-isel.ll | 75 +++--- .../X86/avx512vl-intrinsics-upgrade.ll | 32 +++ llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 32 --- .../CodeGen/X86/sse2-intrinsics-fast-isel.ll | 3 +- .../X86/sse2-intrinsics-x86-upgrade.ll | 11 + llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 45 +--- 15 files changed, 431 insertions(+), 261 deletions(-) create mode 100644 llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 510cd471c576..f3357514b2f8 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -465,8 +465,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">, @@ -1431,8 +1429,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">, @@ -3833,18 +3829,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtqq2pd_128 : - GCCBuiltin<"__builtin_ia32_cvtqq2pd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtqq2pd_256 : - GCCBuiltin<"__builtin_ia32_cvtqq2pd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cvtqq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">, Intrinsic<[llvm_v8f64_ty], @@ -3995,36 +3979,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtudq2ps_128 : - GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtudq2ps_256 : - GCCBuiltin<"__builtin_ia32_cvtudq2ps256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cvtudq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtuqq2pd_128 : - GCCBuiltin<"__builtin_ia32_cvtuqq2pd128_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_cvtuqq2pd_256 : - GCCBuiltin<"__builtin_ia32_cvtuqq2pd256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_cvtuqq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">, Intrinsic<[llvm_v8f64_ty], diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 85202cf7ba4f..ba2f3fa92489 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -172,6 +172,12 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.pmull.") || // Added in 4.0 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 + Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0 + Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0 + Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0 + Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0 + Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0 + Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0 Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0 Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0 @@ -265,8 +271,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "sse2.cvtsi642sd" || // Added in 7.0 Name == "sse2.cvtss2sd" || // Added in 7.0 Name == "sse2.cvtdq2pd" || // Added in 3.9 + Name == "sse2.cvtdq2ps" || // Added in 7.0 Name == "sse2.cvtps2pd" || // Added in 3.9 Name == "avx.cvtdq2.pd.256" || // Added in 3.9 + Name == "avx.cvtdq2.ps.256" || // Added in 7.0 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 Name.startswith("avx.vinsertf128.") || // Added in 3.7 Name == "avx2.vinserti128" || // Added in 3.7 @@ -1195,10 +1203,6 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, IID = Intrinsic::x86_avx512_vpermilvar_pd_512; else llvm_unreachable("Unexpected intrinsic"); - } else if (Name == "cvtdq2ps.128") { - IID = Intrinsic::x86_sse2_cvtdq2ps; - } else if (Name == "cvtdq2ps.256") { - IID = Intrinsic::x86_avx_cvtdq2_ps_256; } else if (Name == "cvtpd2dq.256") { IID = Intrinsic::x86_avx_cvt_pd2dq_256; } else if (Name == "cvtpd2ps.256") { @@ -1607,36 +1611,41 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType()); Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); } else if (IsX86 && (Name == "sse2.cvtdq2pd" || - Name == "sse2.cvtps2pd" || + Name == "sse2.cvtdq2ps" || Name == "avx.cvtdq2.pd.256" || + Name == "avx.cvtdq2.ps.256" || + Name.startswith("avx512.mask.cvtdq2pd.") || + Name.startswith("avx512.mask.cvtudq2pd.") || + Name == "avx512.mask.cvtdq2ps.128" || + Name == "avx512.mask.cvtdq2ps.256" || + Name == "avx512.mask.cvtudq2ps.128" || + Name == "avx512.mask.cvtudq2ps.256" || + Name == "avx512.mask.cvtqq2pd.128" || + Name == "avx512.mask.cvtqq2pd.256" || + Name == "avx512.mask.cvtuqq2pd.128" || + Name == "avx512.mask.cvtuqq2pd.256" || + Name == "sse2.cvtps2pd" || Name == "avx.cvt.ps2.pd.256" || Name == "avx512.mask.cvtps2pd.128" || - Name == "avx512.mask.cvtps2pd.256" || - Name.startswith("avx512.mask.cvtdq2pd.") || - Name.startswith("avx512.mask.cvtudq2pd."))) { - // Lossless i32/float to double conversion. - // Extract the bottom elements if necessary and convert to double vector. - Value *Src = CI->getArgOperand(0); - VectorType *SrcTy = cast(Src->getType()); - VectorType *DstTy = cast(CI->getType()); + Name == "avx512.mask.cvtps2pd.256")) { + Type *DstTy = CI->getType(); Rep = CI->getArgOperand(0); - unsigned NumDstElts = DstTy->getNumElements(); - if (NumDstElts < SrcTy->getNumElements()) { + unsigned NumDstElts = DstTy->getVectorNumElements(); + if (NumDstElts < Rep->getType()->getVectorNumElements()) { assert(NumDstElts == 2 && "Unexpected vector size"); uint32_t ShuffleMask[2] = { 0, 1 }; - Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), - ShuffleMask); + Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask); } - bool SInt2Double = (StringRef::npos != Name.find("cvtdq2")); - bool UInt2Double = (StringRef::npos != Name.find("cvtudq2")); - if (SInt2Double) - Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); - else if (UInt2Double) - Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd"); - else + bool IsPS2PD = (StringRef::npos != Name.find("ps2")); + bool IsUnsigned = (StringRef::npos != Name.find("cvtu")); + if (IsPS2PD) Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); + else if (IsUnsigned) + Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt"); + else + Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt"); if (CI->getNumArgOperands() == 3) Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 6a91b29cb7a8..dca513bf7f84 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -375,7 +375,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), - X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), @@ -589,10 +588,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTP2UI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK, X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_128, INTR_TYPE_1OP_MASK, - ISD::SINT_TO_FP, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_256, INTR_TYPE_1OP_MASK, - ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, INTR_TYPE_1OP_MASK, @@ -647,16 +642,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { ISD::FP_TO_UINT, 0), X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK, ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), - X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_128, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_256, INTR_TYPE_1OP_MASK, - ISD::UINT_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, INTR_TYPE_1OP_MASK, @@ -1514,7 +1501,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_comile_sd, COMI, X86ISD::COMI, ISD::SETLE), X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT), X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE), - X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(sse2_cvtpd2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index 0ad827225690..0e445ba3a4e7 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -597,10 +597,9 @@ define <8 x float> @test_mm256_cvtepi32_ps(<4 x i64> %a0) nounwind { ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 ; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <8 x i32> - %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %arg0) + %res = sitofp <8 x i32> %arg0 to <8 x float> ret <8 x float> %res } -declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone define <2 x i64> @test_mm256_cvtpd_epi32(<4 x double> %a0) nounwind { ; X32-LABEL: test_mm256_cvtpd_epi32: diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll index 1bf7bb360e1e..38654f241a3d 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -610,3 +610,14 @@ define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { +; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 6c18efd2edfb..e26c125d283b 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -193,22 +193,6 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone -define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { -; AVX-LABEL: test_x86_avx_cvtdq2_ps_256: -; AVX: # %bb.0: -; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0] -; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] - ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone - - define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { ; AVX-LABEL: test_x86_avx_cvtt_pd2dq_256: ; AVX: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll new file mode 100644 index 000000000000..335c572ebf22 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll @@ -0,0 +1,225 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X32 +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X64 + +; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vldq-builtins.c + +define <2 x double> @test_mm_cvtepi64_pd(<2 x i64> %__A) { +; X32-LABEL: test_mm_cvtepi64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: vcvtqq2pd %xmm0, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm_cvtepi64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: vcvtqq2pd %xmm0, %xmm0 +; X64-NEXT: retq +entry: + %conv.i = sitofp <2 x i64> %__A to <2 x double> + ret <2 x double> %conv.i +} + +define <2 x double> @test_mm_mask_cvtepi64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) { +; X32-LABEL: test_mm_mask_cvtepi64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vcvtqq2pd %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_cvtepi64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vcvtqq2pd %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %conv.i.i = sitofp <2 x i64> %__A to <2 x double> + %0 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W + ret <2 x double> %1 +} + +define <2 x double> @test_mm_maskz_cvtepi64_pd(i8 zeroext %__U, <2 x i64> %__A) { +; X32-LABEL: test_mm_maskz_cvtepi64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vcvtqq2pd %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_cvtepi64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vcvtqq2pd %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %conv.i.i = sitofp <2 x i64> %__A to <2 x double> + %0 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer + ret <2 x double> %1 +} + +define <4 x double> @test_mm256_cvtepi64_pd(<4 x i64> %__A) { +; X32-LABEL: test_mm256_cvtepi64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: vcvtqq2pd %ymm0, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_cvtepi64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: vcvtqq2pd %ymm0, %ymm0 +; X64-NEXT: retq +entry: + %conv.i = sitofp <4 x i64> %__A to <4 x double> + ret <4 x double> %conv.i +} + +define <4 x double> @test_mm256_mask_cvtepi64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) { +; X32-LABEL: test_mm256_mask_cvtepi64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vcvtqq2pd %ymm1, %ymm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_mask_cvtepi64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vcvtqq2pd %ymm1, %ymm0 {%k1} +; X64-NEXT: retq +entry: + %conv.i.i = sitofp <4 x i64> %__A to <4 x double> + %0 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W + ret <4 x double> %1 +} + +define <4 x double> @test_mm256_maskz_cvtepi64_pd(i8 zeroext %__U, <4 x i64> %__A) { +; X32-LABEL: test_mm256_maskz_cvtepi64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vcvtqq2pd %ymm0, %ymm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_maskz_cvtepi64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vcvtqq2pd %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: retq +entry: + %conv.i.i = sitofp <4 x i64> %__A to <4 x double> + %0 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer + ret <4 x double> %1 +} + +define <2 x double> @test_mm_cvtepu64_pd(<2 x i64> %__A) { +; X32-LABEL: test_mm_cvtepu64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: vcvtuqq2pd %xmm0, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm_cvtepu64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: vcvtuqq2pd %xmm0, %xmm0 +; X64-NEXT: retq +entry: + %conv.i = uitofp <2 x i64> %__A to <2 x double> + ret <2 x double> %conv.i +} + +define <2 x double> @test_mm_mask_cvtepu64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) { +; X32-LABEL: test_mm_mask_cvtepu64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vcvtuqq2pd %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_cvtepu64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vcvtuqq2pd %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %conv.i.i = uitofp <2 x i64> %__A to <2 x double> + %0 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W + ret <2 x double> %1 +} + +define <2 x double> @test_mm_maskz_cvtepu64_pd(i8 zeroext %__U, <2 x i64> %__A) { +; X32-LABEL: test_mm_maskz_cvtepu64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vcvtuqq2pd %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_cvtepu64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vcvtuqq2pd %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %conv.i.i = uitofp <2 x i64> %__A to <2 x double> + %0 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> + %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer + ret <2 x double> %1 +} + +define <4 x double> @test_mm256_cvtepu64_pd(<4 x i64> %__A) { +; X32-LABEL: test_mm256_cvtepu64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: vcvtuqq2pd %ymm0, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_cvtepu64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: vcvtuqq2pd %ymm0, %ymm0 +; X64-NEXT: retq +entry: + %conv.i = uitofp <4 x i64> %__A to <4 x double> + ret <4 x double> %conv.i +} + +define <4 x double> @test_mm256_mask_cvtepu64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) { +; X32-LABEL: test_mm256_mask_cvtepu64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vcvtuqq2pd %ymm1, %ymm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_mask_cvtepu64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vcvtuqq2pd %ymm1, %ymm0 {%k1} +; X64-NEXT: retq +entry: + %conv.i.i = uitofp <4 x i64> %__A to <4 x double> + %0 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W + ret <4 x double> %1 +} + +define <4 x double> @test_mm256_maskz_cvtepu64_pd(i8 zeroext %__U, <4 x i64> %__A) { +; X32-LABEL: test_mm256_maskz_cvtepu64_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vcvtuqq2pd %ymm0, %ymm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_maskz_cvtepu64_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vcvtuqq2pd %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: retq +entry: + %conv.i.i = uitofp <4 x i64> %__A to <4 x double> + %0 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer + ret <4 x double> %1 +} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index 84c31f2f1f86..e006bd9db1b6 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -1855,3 +1855,67 @@ define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) ret i8 %res } + +declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) + %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) + %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) + %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) + %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 580fb60d1bba..8d8fad64edc0 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -129,38 +129,6 @@ define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i ret <4 x i64> %res2 } -declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8) - -define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] -; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xc0] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) - %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) - %res2 = fadd <2 x double> %res, %res1 - ret <2 x double> %res2 -} - -declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8) - -define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] -; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) - %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) - %res2 = fadd <4 x double> %res, %res1 - ret <4 x double> %res2 -} - declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8) define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) { @@ -307,38 +275,6 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i ret <4 x i64> %res2 } -declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8) - -define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] -; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xc0] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) - %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) - %res2 = fadd <2 x double> %res, %res1 - ret <2 x double> %res2 -} - -declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8) - -define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] -; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) - %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) - %res2 = fadd <4 x double> %res, %res1 - ret <4 x double> %res2 -} - declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8) define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) { diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll index b36388272021..586dac8172cc 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -19,11 +19,11 @@ define <4 x float> @test_mm_mask_cvtepi32_ps(<4 x float> %__W, i8 zeroext %__U, ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %0) #8 - %2 = bitcast i8 %__U to <8 x i1> - %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> - %3 = select <4 x i1> %extract.i, <4 x float> %1, <4 x float> %__W - ret <4 x float> %3 + %conv.i.i = sitofp <4 x i32> %0 to <4 x float> + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = select <4 x i1> %extract.i, <4 x float> %conv.i.i, <4 x float> %__W + ret <4 x float> %2 } define <4 x float> @test_mm_maskz_cvtepi32_ps(i16 zeroext %__U, <2 x i64> %__A) { @@ -41,11 +41,12 @@ define <4 x float> @test_mm_maskz_cvtepi32_ps(i16 zeroext %__U, <2 x i64> %__A) entry: %conv.i = trunc i16 %__U to i8 %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %0) #8 - %2 = bitcast i8 %conv.i to <8 x i1> - %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> - %3 = select <4 x i1> %extract.i, <4 x float> %1, <4 x float> zeroinitializer - ret <4 x float> %3 + %conv.i.i = sitofp <4 x i32> %0 to <4 x float> + %1 = bitcast i8 %conv.i to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = select <4 x i1> %extract.i, <4 x float> %conv.i.i, <4 x float> zeroinitializer + ret <4 x float> %2 + } define <8 x float> @test_mm256_mask_cvtepi32_ps(<8 x float> %__W, i8 zeroext %__U, <4 x i64> %__A) { @@ -63,10 +64,10 @@ define <8 x float> @test_mm256_mask_cvtepi32_ps(<8 x float> %__W, i8 zeroext %__ ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %0) #8 - %2 = bitcast i8 %__U to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %__W - ret <8 x float> %3 + %conv.i.i = sitofp <8 x i32> %0 to <8 x float> + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %conv.i.i, <8 x float> %__W + ret <8 x float> %2 } define <8 x float> @test_mm256_maskz_cvtepi32_ps(i16 zeroext %__U, <4 x i64> %__A) { @@ -84,10 +85,10 @@ define <8 x float> @test_mm256_maskz_cvtepi32_ps(i16 zeroext %__U, <4 x i64> %__ entry: %conv.i = trunc i16 %__U to i8 %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %0) #8 - %2 = bitcast i8 %conv.i to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer - ret <8 x float> %3 + %conv.i.i = sitofp <8 x i32> %0 to <8 x float> + %1 = bitcast i8 %conv.i to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %conv.i.i, <8 x float> zeroinitializer + ret <8 x float> %2 } define <2 x i64> @test_mm_mask_cvtpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) { @@ -1197,8 +1198,8 @@ define <4 x float> @test_mm_cvtepu32_ps(<2 x i64> %__A) { ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %0, <4 x float> zeroinitializer, i8 -1) #8 - ret <4 x float> %1 + %conv.i = uitofp <4 x i32> %0 to <4 x float> + ret <4 x float> %conv.i } define <4 x float> @test_mm_mask_cvtepu32_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) { @@ -1216,8 +1217,11 @@ define <4 x float> @test_mm_mask_cvtepu32_ps(<4 x float> %__W, i8 zeroext %__U, ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %0, <4 x float> %__W, i8 %__U) #8 - ret <4 x float> %1 + %conv.i.i = uitofp <4 x i32> %0 to <4 x float> + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = select <4 x i1> %extract.i, <4 x float> %conv.i.i, <4 x float> %__W + ret <4 x float> %2 } define <4 x float> @test_mm_maskz_cvtepu32_ps(i8 zeroext %__U, <2 x i64> %__A) { @@ -1235,8 +1239,11 @@ define <4 x float> @test_mm_maskz_cvtepu32_ps(i8 zeroext %__U, <2 x i64> %__A) { ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__A to <4 x i32> - %1 = tail call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %0, <4 x float> zeroinitializer, i8 %__U) #8 - ret <4 x float> %1 + %conv.i.i = uitofp <4 x i32> %0 to <4 x float> + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = select <4 x i1> %extract.i, <4 x float> %conv.i.i, <4 x float> zeroinitializer + ret <4 x float> %2 } define <8 x float> @test_mm256_cvtepu32_ps(<4 x i64> %__A) { @@ -1251,8 +1258,8 @@ define <8 x float> @test_mm256_cvtepu32_ps(<4 x i64> %__A) { ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %0, <8 x float> zeroinitializer, i8 -1) #8 - ret <8 x float> %1 + %conv.i = uitofp <8 x i32> %0 to <8 x float> + ret <8 x float> %conv.i } define <8 x float> @test_mm256_mask_cvtepu32_ps(<8 x float> %__W, i8 zeroext %__U, <4 x i64> %__A) { @@ -1270,8 +1277,10 @@ define <8 x float> @test_mm256_mask_cvtepu32_ps(<8 x float> %__W, i8 zeroext %__ ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %0, <8 x float> %__W, i8 %__U) #8 - ret <8 x float> %1 + %conv.i.i = uitofp <8 x i32> %0 to <8 x float> + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %conv.i.i, <8 x float> %__W + ret <8 x float> %2 } define <8 x float> @test_mm256_maskz_cvtepu32_ps(i8 zeroext %__U, <4 x i64> %__A) { @@ -1289,8 +1298,10 @@ define <8 x float> @test_mm256_maskz_cvtepu32_ps(i8 zeroext %__U, <4 x i64> %__A ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__A to <8 x i32> - %1 = tail call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %0, <8 x float> zeroinitializer, i8 %__U) #8 - ret <8 x float> %1 + %conv.i.i = uitofp <8 x i32> %0 to <8 x float> + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %conv.i.i, <8 x float> zeroinitializer + ret <8 x float> %2 } define <8 x float> @test_mm256_shuffle_f32x4(<8 x float> %__A, <8 x float> %__B) { @@ -3793,8 +3804,6 @@ entry: ret <4 x i64> %2 } -declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) -declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8) declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8) @@ -3813,8 +3822,6 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8) declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8) -declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) -declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8) declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8) !0 = !{i32 1} diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index c686e5a47966..17a319d4649f 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -6953,3 +6953,35 @@ define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i6 %res2 = add <4 x i64> %res, %res1 ret <4 x i64> %res2 } + +declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: +; CHECK: ## %bb.0: +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8] +; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) + %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: +; CHECK: ## %bb.0: +; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8] +; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) + %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 7a0b66f30769..332c505fec80 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2058,38 +2058,6 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x ret <8 x i32> %res2 } -declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) - -define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8] -; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) - %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) - %res2 = fadd <4 x float> %res, %res1 - ret <4 x float> %res2 -} - -declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8) - -define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8] -; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xc0] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) - %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) - %res2 = fadd <8 x float> %res, %res1 - ret <8 x float> %res2 -} - declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8) define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index df92c47a7b71..b7f91063fe2c 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1114,10 +1114,9 @@ define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind { ; X64-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> - %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0) + %res = sitofp <4 x i32> %arg0 to <4 x float> ret <4 x float> %res } -declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind { ; X32-LABEL: test_mm_cvtpd_epi32: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index 7d1a25b0ac46..5038b04ac4f3 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -305,3 +305,14 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } + + +define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { +; CHECK-LABEL: test_x86_sse2_cvtdq2ps: +; CHECK: ## %bb.0: +; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 +; CHECK-NEXT: retl + %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index 085649a530a8..7643a6eff7fa 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -209,27 +209,6 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone -define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { -; SSE-LABEL: test_x86_sse2_cvtdq2ps: -; SSE: ## %bb.0: -; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; AVX2-LABEL: test_x86_sse2_cvtdq2ps: -; AVX2: ## %bb.0: -; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] -; -; SKX-LABEL: test_x86_sse2_cvtdq2ps: -; SKX: ## %bb.0: -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] -; SKX-NEXT: retl ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone - - define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvtpd2dq: ; SSE: ## %bb.0: @@ -694,21 +673,21 @@ define <8 x i16> @test_x86_sse2_packssdw_128_fold() { ; SSE: ## %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI31_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI30_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packssdw_128_fold: ; AVX2: ## %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI31_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI30_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packssdw_128_fold: ; SKX: ## %bb.0: -; SKX-NEXT: vmovaps LCPI31_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] +; SKX-NEXT: vmovaps LCPI30_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI31_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI30_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) ret <8 x i16> %res @@ -741,21 +720,21 @@ define <16 x i8> @test_x86_sse2_packsswb_128_fold() { ; SSE: ## %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI33_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI32_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packsswb_128_fold: ; AVX2: ## %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packsswb_128_fold: ; SKX: ## %bb.0: -; SKX-NEXT: vmovaps LCPI33_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; SKX-NEXT: vmovaps LCPI32_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) ret <16 x i8> %res @@ -788,21 +767,21 @@ define <16 x i8> @test_x86_sse2_packuswb_128_fold() { ; SSE: ## %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI35_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI34_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packuswb_128_fold: ; AVX2: ## %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packuswb_128_fold: ; SKX: ## %bb.0: -; SKX-NEXT: vmovaps LCPI35_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; SKX-NEXT: vmovaps LCPI34_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) ret <16 x i8> %res