diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index dc50d860a87d..e88c3fdbb505 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -132,12 +132,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); - setOperationAction(ISD::FMA , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); - setOperationAction(ISD::FMA , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); @@ -378,6 +378,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -5876,6 +5877,26 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than +/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to +/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd +/// is expanded to mul + add. +bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + case MVT::v4f32: + return true; + default: + break; + } + + return false; +} + Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { if (DisableILPPref) return TargetLowering::getSchedulingPreference(N); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 973800b46121..b0a013b4b4cf 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -366,6 +366,12 @@ namespace llvm { bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT VT) const; + private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 6c0f3d3f06e5..b0b842328196 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -274,15 +274,11 @@ let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vmaddfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB))]>, - Requires<[FPContractions]>; + [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>; def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fsub V_immneg0, - (fsub (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB)))]>, - Requires<[FPContractions]>; + [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC, + (fneg VRRC:$vB))))]>; def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 543038335e9d..29f7875a7927 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -353,7 +353,6 @@ def xaddroff : ComplexPattern; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; @@ -1312,51 +1311,43 @@ let Uses = [RM] in { def FMADD : AForm_1<63, 29, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>; def FMADDS : AForm_1<59, 29, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>; def FMSUB : AForm_1<63, 28, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>; def FMSUBS : AForm_1<59, 28, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>; def FNMADD : AForm_1<63, 31, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>; def FNMADDS : AForm_1<59, 31, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>; def FNMSUB : AForm_1<63, 30, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC, + (fneg F8RC:$FRB))))]>; def FNMSUBS : AForm_1<59, 30, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC, + (fneg F4RC:$FRB))))]>; } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code @@ -1517,14 +1508,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS GPRC:$in, tblockaddress:$g)>; -// Fused negative multiply subtract, alternate pattern -def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)), - (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>, - Requires<[FPContractions]>; -def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)), - (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>, - Requires<[FPContractions]>; - // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts.