diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll index ff78142493e1..564f8644e393 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll @@ -21,8 +21,9 @@ define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_add_ss_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -52,8 +53,9 @@ define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_add_ss_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -96,8 +98,9 @@ define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_add_sd_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8) @@ -123,8 +126,9 @@ define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: @test_add_sd_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8) @@ -163,8 +167,9 @@ define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_sub_ss_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -194,8 +199,9 @@ define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_sub_ss_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -238,8 +244,9 @@ define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_sub_sd_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8) @@ -265,8 +272,9 @@ define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: @test_sub_sd_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8) @@ -305,8 +313,9 @@ define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_mul_ss_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -336,8 +345,9 @@ define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_mul_ss_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -380,8 +390,9 @@ define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_mul_sd_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8) @@ -407,8 +418,9 @@ define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: @test_mul_sd_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8) @@ -447,8 +459,9 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_div_ss_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -478,8 +491,9 @@ define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_div_ss_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -522,8 +536,9 @@ define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_div_sd_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8) @@ -549,8 +564,9 @@ define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x dou define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: @test_div_sd_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8) @@ -574,8 +590,9 @@ declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_max_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 4) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -586,8 +603,9 @@ define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_max_ss_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -617,8 +635,9 @@ declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x doubl define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_max_sd( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 4) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4) @@ -627,8 +646,9 @@ define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) { define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: @test_max_sd_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -652,8 +672,9 @@ declare <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float>, <4 x float>, define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_min_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 4) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -664,8 +685,9 @@ define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_min_ss_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -695,8 +717,9 @@ declare <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double>, <2 x doubl define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @test_min_sd( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[B:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x double> undef, i8 -1, i32 4) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4) @@ -705,8 +728,9 @@ define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) { define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: @test_min_sd_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4) -; CHECK-NEXT: ret <2 x double> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[C:%.*]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[TMP1]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4) @@ -1551,15 +1575,15 @@ define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x fl ; CHECK-LABEL: @test_mask3_vfmsub_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 -; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS]] -; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP9]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP10]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1584,14 +1608,14 @@ define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> ; CHECK-LABEL: @test_mask3_vfmsub_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 -; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS]] -; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]] -; CHECK-NEXT: ret float [[TMP8]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]] +; CHECK-NEXT: ret float [[TMP9]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1635,15 +1659,15 @@ define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x ; CHECK-LABEL: @test_mask3_vfmsub_sd( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 -; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS]] -; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP9]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP10]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -1664,14 +1688,14 @@ define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x dou ; CHECK-LABEL: @test_mask3_vfmsub_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 -; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS]] -; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]] -; CHECK-NEXT: ret double [[TMP8]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]] +; CHECK-NEXT: ret double [[TMP9]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = fsub <2 x double> , %1 @@ -1709,18 +1733,18 @@ define double @test_mask3_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_mask3_vfnmsub_ss( -; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[DOTRHS]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 -; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS1]] -; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0 -; CHECK-NEXT: ret <4 x float> [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0 +; CHECK-NEXT: ret <4 x float> [[TMP11]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1744,17 +1768,17 @@ define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x f define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_mask3_vfnmsub_ss_0( -; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[DOTRHS]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 -; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = fsub float -0.000000e+00, [[DOTRHS1]] -; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP5]] -; CHECK-NEXT: ret float [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]] +; CHECK-NEXT: ret float [[TMP10]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1798,18 +1822,18 @@ define float @test_mask3_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: @test_mask3_vfnmsub_sd( -; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = fsub double -0.000000e+00, [[DOTRHS]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 -; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS1]] -; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0 -; CHECK-NEXT: ret <2 x double> [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0 +; CHECK-NEXT: ret <2 x double> [[TMP11]] ; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 @@ -1829,17 +1853,17 @@ define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { ; CHECK-LABEL: @test_mask3_vfnmsub_sd_0( -; CHECK-NEXT: [[DOTRHS:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = fsub double -0.000000e+00, [[DOTRHS]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 -; CHECK-NEXT: [[DOTRHS1:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = fsub double -0.000000e+00, [[DOTRHS1]] -; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP5]] -; CHECK-NEXT: ret double [[TMP8]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]] +; CHECK-NEXT: ret double [[TMP10]] ; %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1 %2 = fsub <2 x double> , %a