[Tests] Regen to remove future test diffs

This file appears to have been manually editted at some point after being auto-updated. A future change adjusts this file slightly, and all of the updates makes the diff super confusing.

llvm-svn: 352453
This commit is contained in:
Philip Reames 2019-01-29 06:34:46 +00:00
parent 3846b9b443
commit 2ddf96db50
1 changed files with 152 additions and 128 deletions

View File

@ -21,8 +21,9 @@ define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_add_ss_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -52,8 +53,9 @@ define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_add_ss_mask_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -96,8 +98,9 @@ define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_add_sd_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
@ -123,8 +126,9 @@ define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_add_sd_mask_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
@ -163,8 +167,9 @@ define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_sub_ss_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -194,8 +199,9 @@ define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_sub_ss_mask_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -238,8 +244,9 @@ define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_sub_sd_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
@ -265,8 +272,9 @@ define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_sub_sd_mask_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
@ -305,8 +313,9 @@ define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_mul_ss_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -336,8 +345,9 @@ define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mul_ss_mask_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -380,8 +390,9 @@ define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_mul_sd_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
@ -407,8 +418,9 @@ define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_mul_sd_mask_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
@ -447,8 +459,9 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_div_ss_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -478,8 +491,9 @@ define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float>
define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_div_ss_mask_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -522,8 +536,9 @@ define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_div_sd_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
@ -549,8 +564,9 @@ define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou
define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_div_sd_mask_round(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
@ -574,8 +590,9 @@ declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>,
define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_max_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 4)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -586,8 +603,9 @@ define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_max_ss_mask(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 4)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -617,8 +635,9 @@ declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x doubl
define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_max_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 4)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
@ -627,8 +646,9 @@ define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_max_sd_mask(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 4)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
@ -652,8 +672,9 @@ declare <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float>, <4 x float>,
define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_min_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 4)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -664,8 +685,9 @@ define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_min_ss_mask(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
; CHECK-NEXT: ret <4 x float> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 4)
; CHECK-NEXT: ret <4 x float> [[TMP2]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -695,8 +717,9 @@ declare <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double>, <2 x doubl
define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_min_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 4)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
@ -705,8 +728,9 @@ define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_min_sd_mask(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 4)
; CHECK-NEXT: ret <2 x double> [[TMP2]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
@ -1551,15 +1575,15 @@ define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x fl
; CHECK-LABEL: @test_mask3_vfmsub_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS]]
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP9]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP10]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -1584,14 +1608,14 @@ define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float>
; CHECK-LABEL: @test_mask3_vfmsub_ss_0(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS]]
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]]
; CHECK-NEXT: ret float [[TMP8]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
; CHECK-NEXT: ret float [[TMP9]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -1635,15 +1659,15 @@ define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x
; CHECK-LABEL: @test_mask3_vfmsub_sd(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS]]
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0
; CHECK-NEXT: ret <2 x double> [[TMP9]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
; CHECK-NEXT: ret <2 x double> [[TMP10]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
@ -1664,14 +1688,14 @@ define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x dou
; CHECK-LABEL: @test_mask3_vfmsub_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS]]
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]]
; CHECK-NEXT: ret double [[TMP8]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
; CHECK-NEXT: ret double [[TMP9]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
@ -1709,18 +1733,18 @@ define double @test_mask3_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfnmsub_ss(
; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[DOTRHS]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS1]]
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP9]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP11]]
;
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -1744,17 +1768,17 @@ define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x f
define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfnmsub_ss_0(
; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[DOTRHS]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS1]]
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]]
; CHECK-NEXT: ret float [[TMP8]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
; CHECK-NEXT: ret float [[TMP10]]
;
%1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
@ -1798,18 +1822,18 @@ define float @test_mask3_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float
define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfnmsub_sd(
; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = fsub double -0.000000e+00, [[DOTRHS]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS1]]
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0
; CHECK-NEXT: ret <2 x double> [[TMP9]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0
; CHECK-NEXT: ret <2 x double> [[TMP11]]
;
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
%2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
@ -1829,17 +1853,17 @@ define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2
define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfnmsub_sd_0(
; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = fsub double -0.000000e+00, [[DOTRHS]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS1]]
; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]]
; CHECK-NEXT: ret double [[TMP8]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
; CHECK-NEXT: ret double [[TMP10]]
;
%1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a