[X86] Add comment string for broadcast loads from the constant pool.

Summary:
When broadcasting from the constant pool its useful to print out the final vector similar to what we do for normal moves from the constant pool.

I changed only a couple tests that were broadcast focused. One of them had been previously hand tweaked after running the script so that it could check the constant pool declaration. But I think this patch makes that unnecessary now since we can check the comment instead.

Reviewers: spatel, RKSimon, zvi

Reviewed By: spatel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D34923

llvm-svn: 307062
This commit is contained in:
Craig Topper 2017-07-04 05:46:11 +00:00
parent 49fc24a8bf
commit ad140cfb68
7 changed files with 1652 additions and 713 deletions

View File

@ -1332,6 +1332,32 @@ static std::string getShuffleComment(const MachineInstr *MI,
return Comment;
}
static void printConstant(const Constant *COp, raw_ostream &CS) {
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
if (CI->getBitWidth() <= 64) {
CS << CI->getZExtValue();
} else {
// print multi-word constant as (w0,w1)
const auto &Val = CI->getValue();
CS << "(";
for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
if (i > 0)
CS << ",";
CS << Val.getRawData()[i];
}
CS << ")";
}
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
SmallString<32> Str;
CF->getValueAPF().toString(Str);
CS << Str;
} else {
CS << "?";
}
}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
@ -1766,59 +1792,73 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// For loads from a constant pool to a vector register, print the constant
// loaded.
CASE_ALL_MOV_RM()
case X86::VBROADCASTF128:
case X86::VBROADCASTI128:
case X86::VBROADCASTF32X4Z256rm:
case X86::VBROADCASTF32X4rm:
case X86::VBROADCASTF32X8rm:
case X86::VBROADCASTF64X2Z128rm:
case X86::VBROADCASTF64X2rm:
case X86::VBROADCASTF64X4rm:
case X86::VBROADCASTI32X4Z256rm:
case X86::VBROADCASTI32X4rm:
case X86::VBROADCASTI32X8rm:
case X86::VBROADCASTI64X2Z128rm:
case X86::VBROADCASTI64X2rm:
case X86::VBROADCASTI64X4rm:
if (!OutStreamer->isVerboseAsm())
break;
if (MI->getNumOperands() <= 4)
break;
if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
int NumLanes = 1;
// Override NumLanes for the broadcast instructions.
switch (MI->getOpcode()) {
case X86::VBROADCASTF128: NumLanes = 2; break;
case X86::VBROADCASTI128: NumLanes = 2; break;
case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
case X86::VBROADCASTF32X4rm: NumLanes = 4; break;
case X86::VBROADCASTF32X8rm: NumLanes = 2; break;
case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
case X86::VBROADCASTF64X2rm: NumLanes = 4; break;
case X86::VBROADCASTF64X4rm: NumLanes = 2; break;
case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
case X86::VBROADCASTI32X4rm: NumLanes = 4; break;
case X86::VBROADCASTI32X8rm: NumLanes = 2; break;
case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
case X86::VBROADCASTI64X2rm: NumLanes = 4; break;
case X86::VBROADCASTI64X4rm: NumLanes = 2; break;
}
std::string Comment;
raw_string_ostream CS(Comment);
const MachineOperand &DstOp = MI->getOperand(0);
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
CS << "[";
for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
if (i != 0)
CS << ",";
if (CDS->getElementType()->isIntegerTy())
CS << CDS->getElementAsInteger(i);
else if (CDS->getElementType()->isFloatTy())
CS << CDS->getElementAsFloat(i);
else if (CDS->getElementType()->isDoubleTy())
CS << CDS->getElementAsDouble(i);
else
CS << "?";
for (int l = 0; l != NumLanes; ++l) {
for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
if (i != 0 || l != 0)
CS << ",";
if (CDS->getElementType()->isIntegerTy())
CS << CDS->getElementAsInteger(i);
else if (CDS->getElementType()->isFloatTy())
CS << CDS->getElementAsFloat(i);
else if (CDS->getElementType()->isDoubleTy())
CS << CDS->getElementAsDouble(i);
else
CS << "?";
}
}
CS << "]";
OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
if (i != 0)
CS << ",";
Constant *COp = CV->getOperand(i);
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
if (CI->getBitWidth() <= 64) {
CS << CI->getZExtValue();
} else {
// print multi-word constant as (w0,w1)
const auto &Val = CI->getValue();
CS << "(";
for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
if (i > 0)
CS << ",";
CS << Val.getRawData()[i];
}
CS << ")";
}
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
SmallString<32> Str;
CF->getValueAPF().toString(Str);
CS << Str;
} else {
CS << "?";
for (int l = 0; l != NumLanes; ++l) {
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
if (i != 0 || l != 0)
CS << ",";
printConstant(CV->getOperand(i), CS);
}
}
CS << ">";
@ -1826,6 +1866,85 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
break;
case X86::VBROADCASTSSrm:
case X86::VBROADCASTSSYrm:
case X86::VBROADCASTSSZ128m:
case X86::VBROADCASTSSZ256m:
case X86::VBROADCASTSSZm:
case X86::VBROADCASTSDYrm:
case X86::VBROADCASTSDZ256m:
case X86::VBROADCASTSDZm:
case X86::VPBROADCASTBrm:
case X86::VPBROADCASTBYrm:
case X86::VPBROADCASTBZ128m:
case X86::VPBROADCASTBZ256m:
case X86::VPBROADCASTBZm:
case X86::VPBROADCASTDrm:
case X86::VPBROADCASTDYrm:
case X86::VPBROADCASTDZ128m:
case X86::VPBROADCASTDZ256m:
case X86::VPBROADCASTDZm:
case X86::VPBROADCASTQrm:
case X86::VPBROADCASTQYrm:
case X86::VPBROADCASTQZ128m:
case X86::VPBROADCASTQZ256m:
case X86::VPBROADCASTQZm:
case X86::VPBROADCASTWrm:
case X86::VPBROADCASTWYrm:
case X86::VPBROADCASTWZ128m:
case X86::VPBROADCASTWZ256m:
case X86::VPBROADCASTWZm:
if (!OutStreamer->isVerboseAsm())
break;
if (MI->getNumOperands() <= 4)
break;
if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
int NumElts;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
case X86::VBROADCASTSSrm: NumElts = 4; break;
case X86::VBROADCASTSSYrm: NumElts = 8; break;
case X86::VBROADCASTSSZ128m: NumElts = 4; break;
case X86::VBROADCASTSSZ256m: NumElts = 8; break;
case X86::VBROADCASTSSZm: NumElts = 16; break;
case X86::VBROADCASTSDYrm: NumElts = 4; break;
case X86::VBROADCASTSDZ256m: NumElts = 4; break;
case X86::VBROADCASTSDZm: NumElts = 8; break;
case X86::VPBROADCASTBrm: NumElts = 16; break;
case X86::VPBROADCASTBYrm: NumElts = 32; break;
case X86::VPBROADCASTBZ128m: NumElts = 16; break;
case X86::VPBROADCASTBZ256m: NumElts = 32; break;
case X86::VPBROADCASTBZm: NumElts = 64; break;
case X86::VPBROADCASTDrm: NumElts = 4; break;
case X86::VPBROADCASTDYrm: NumElts = 8; break;
case X86::VPBROADCASTDZ128m: NumElts = 4; break;
case X86::VPBROADCASTDZ256m: NumElts = 8; break;
case X86::VPBROADCASTDZm: NumElts = 16; break;
case X86::VPBROADCASTQrm: NumElts = 2; break;
case X86::VPBROADCASTQYrm: NumElts = 4; break;
case X86::VPBROADCASTQZ128m: NumElts = 2; break;
case X86::VPBROADCASTQZ256m: NumElts = 4; break;
case X86::VPBROADCASTQZm: NumElts = 8; break;
case X86::VPBROADCASTWrm: NumElts = 8; break;
case X86::VPBROADCASTWYrm: NumElts = 16; break;
case X86::VPBROADCASTWZ128m: NumElts = 8; break;
case X86::VPBROADCASTWZ256m: NumElts = 16; break;
case X86::VPBROADCASTWZm: NumElts = 32; break;
}
std::string Comment;
raw_string_ostream CS(Comment);
const MachineOperand &DstOp = MI->getOperand(0);
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
CS << "[";
for (int i = 0; i != NumElts; ++i) {
if (i != 0)
CS << ",";
printConstant(C, CS);
}
CS << "]";
OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
}
}
MCInst TmpInst;

View File

@ -2624,7 +2624,8 @@ define void @avg_v64i8_const(<64 x i8>* %a) {
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = [1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8]
; AVX512F-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vpaddd %zmm4, %zmm3, %zmm3
; AVX512F-NEXT: vpaddd %zmm4, %zmm2, %zmm2
; AVX512F-NEXT: vpaddd %zmm4, %zmm1, %zmm1
@ -2941,7 +2942,8 @@ define void @avg_v32i16_const(<32 x i16>* %a) {
; AVX512F: # BB#0:
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = [1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8]
; AVX512F-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vpaddd %zmm2, %zmm1, %zmm1
; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0

View File

@ -633,13 +633,13 @@ entry:
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
; X32-AVX2-LABEL: V111:
; X32-AVX2: ## BB#0: ## %entry
; X32-AVX2-NEXT: vpbroadcastd LCPI29_0, %ymm1
; X32-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; X32-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: V111:
; X64-AVX2: ## BB#0: ## %entry
; X64-AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
;
@ -660,13 +660,13 @@ entry:
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
; X32-AVX2-LABEL: V113:
; X32-AVX2: ## BB#0: ## %entry
; X32-AVX2-NEXT: vbroadcastss LCPI30_0, %ymm1
; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: V113:
; X64-AVX2: ## BB#0: ## %entry
; X64-AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
; X64-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: retq
;
@ -687,12 +687,12 @@ entry:
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: _e2:
; X32: ## BB#0:
; X32-NEXT: vbroadcastss LCPI31_0, %xmm0
; X32-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
; X32-NEXT: retl
;
; X64-LABEL: _e2:
; X64: ## BB#0:
; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
; X64-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
; X64-NEXT: retq
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1

File diff suppressed because it is too large Load Diff

View File

@ -290,19 +290,19 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
; AVX512-LABEL: v4f32_no_estimate:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
@ -361,7 +361,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: retq # sched: [1:1.00]
@ -370,7 +370,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
@ -379,7 +379,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: retq # sched: [1:1.00]
@ -468,7 +468,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@ -480,7 +480,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1
@ -493,7 +493,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; KNL-LABEL: v4f32_two_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@ -504,7 +504,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; SKX-LABEL: v4f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@ -552,19 +552,19 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
; AVX512-LABEL: v8f32_no_estimate:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
@ -630,7 +630,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: retq # sched: [1:1.00]
@ -639,7 +639,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
@ -648,7 +648,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; KNL-LABEL: v8f32_one_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: retq # sched: [1:1.00]
@ -750,7 +750,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@ -762,7 +762,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1
@ -775,7 +775,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; KNL-LABEL: v8f32_two_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@ -786,7 +786,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; SKX-LABEL: v8f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3

View File

@ -415,7 +415,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
@ -425,7 +425,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
@ -435,7 +435,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
@ -514,7 +514,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
@ -525,7 +525,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
@ -536,7 +536,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
@ -635,7 +635,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@ -648,7 +648,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -662,7 +662,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; KNL-LABEL: v4f32_two_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@ -674,7 +674,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; SKX-LABEL: v4f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@ -751,7 +751,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
@ -761,7 +761,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
@ -771,7 +771,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; KNL-LABEL: v8f32_one_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
@ -859,7 +859,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
@ -870,7 +870,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
@ -881,7 +881,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
@ -994,7 +994,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@ -1007,7 +1007,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@ -1021,7 +1021,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; KNL-LABEL: v8f32_two_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@ -1033,7 +1033,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SKX-LABEL: v8f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3

View File

@ -153,14 +153,16 @@ define <32 x i16> @test7(<32 x i16> %a) {
;
; AVX2-LABEL: test7:
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test7:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048]
; AVX512-NEXT: # ymm2 = mem[0,1,0,1]
; AVX512-NEXT: vpmullw %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX512-NEXT: retq
@ -183,7 +185,8 @@ define <16 x i32> @test8(<16 x i32> %a) {
;
; AVX2-LABEL: test8:
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [1,1,2,3,1,1,2,3]
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsllvd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq