From 73347ec081550eca06fef6b9cbd22483e92945e7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 12 Jul 2018 03:42:41 +0000 Subject: [PATCH] [X86] Remove patterns and ISD nodes for the old scalar FMA intrinsic lowering. We now use llvm.fma.f32/f64 or llvm.x86.fmadd.f32/f64 intrinsics that use scalar types rather than vector types. So we don't these special ISD nodes that operate on the lowest element of a vector. llvm-svn: 336883 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 85 +------------------- llvm/lib/Target/X86/X86ISelLowering.h | 13 --- llvm/lib/Target/X86/X86InstrAVX512.td | 52 +++--------- llvm/lib/Target/X86/X86InstrFMA.td | 11 ++- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 23 ------ 5 files changed, 19 insertions(+), 165 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 50c616d382e6..e21e2bb68945 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26055,22 +26055,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND"; case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND"; case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND"; - case X86ISD::FMADDS1: return "X86ISD::FMADDS1"; - case X86ISD::FNMADDS1: return "X86ISD::FNMADDS1"; - case X86ISD::FMSUBS1: return "X86ISD::FMSUBS1"; - case X86ISD::FNMSUBS1: return "X86ISD::FNMSUBS1"; - case X86ISD::FMADDS1_RND: return "X86ISD::FMADDS1_RND"; - case X86ISD::FNMADDS1_RND: return "X86ISD::FNMADDS1_RND"; - case X86ISD::FMSUBS1_RND: return "X86ISD::FMSUBS1_RND"; - case X86ISD::FNMSUBS1_RND: return "X86ISD::FNMSUBS1_RND"; - case X86ISD::FMADDS3: return "X86ISD::FMADDS3"; - case X86ISD::FNMADDS3: return "X86ISD::FNMADDS3"; - case X86ISD::FMSUBS3: return "X86ISD::FMSUBS3"; - case X86ISD::FNMSUBS3: return "X86ISD::FNMSUBS3"; - case X86ISD::FMADDS3_RND: return "X86ISD::FMADDS3_RND"; - case X86ISD::FNMADDS3_RND: return "X86ISD::FNMADDS3_RND"; - case X86ISD::FMSUBS3_RND: return "X86ISD::FMSUBS3_RND"; - case X86ISD::FNMSUBS3_RND: return "X86ISD::FNMSUBS3_RND"; case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H"; case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L"; case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE"; @@ -37707,28 +37691,12 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { default: llvm_unreachable("Unexpected opcode"); case ISD::FMA: Opcode = X86ISD::FNMADD; break; case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; - case X86ISD::FMADDS1: Opcode = X86ISD::FNMADDS1; break; - case X86ISD::FMADDS3: Opcode = X86ISD::FNMADDS3; break; - case X86ISD::FMADDS1_RND: Opcode = X86ISD::FNMADDS1_RND; break; - case X86ISD::FMADDS3_RND: Opcode = X86ISD::FNMADDS3_RND; break; case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FMSUBS1: Opcode = X86ISD::FNMSUBS1; break; - case X86ISD::FMSUBS3: Opcode = X86ISD::FNMSUBS3; break; - case X86ISD::FMSUBS1_RND: Opcode = X86ISD::FNMSUBS1_RND; break; - case X86ISD::FMSUBS3_RND: Opcode = X86ISD::FNMSUBS3_RND; break; case X86ISD::FNMADD: Opcode = ISD::FMA; break; case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; - case X86ISD::FNMADDS1: Opcode = X86ISD::FMADDS1; break; - case X86ISD::FNMADDS3: Opcode = X86ISD::FMADDS3; break; - case X86ISD::FNMADDS1_RND: Opcode = X86ISD::FMADDS1_RND; break; - case X86ISD::FNMADDS3_RND: Opcode = X86ISD::FMADDS3_RND; break; case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; - case X86ISD::FNMSUBS1: Opcode = X86ISD::FMSUBS1; break; - case X86ISD::FNMSUBS3: Opcode = X86ISD::FMSUBS3; break; - case X86ISD::FNMSUBS1_RND: Opcode = X86ISD::FMSUBS1_RND; break; - case X86ISD::FNMSUBS3_RND: Opcode = X86ISD::FMSUBS3_RND; break; } } @@ -37737,28 +37705,12 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { default: llvm_unreachable("Unexpected opcode"); case ISD::FMA: Opcode = X86ISD::FMSUB; break; case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; - case X86ISD::FMADDS1: Opcode = X86ISD::FMSUBS1; break; - case X86ISD::FMADDS3: Opcode = X86ISD::FMSUBS3; break; - case X86ISD::FMADDS1_RND: Opcode = X86ISD::FMSUBS1_RND; break; - case X86ISD::FMADDS3_RND: Opcode = X86ISD::FMSUBS3_RND; break; case X86ISD::FMSUB: Opcode = ISD::FMA; break; case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; - case X86ISD::FMSUBS1: Opcode = X86ISD::FMADDS1; break; - case X86ISD::FMSUBS3: Opcode = X86ISD::FMADDS3; break; - case X86ISD::FMSUBS1_RND: Opcode = X86ISD::FMADDS1_RND; break; - case X86ISD::FMSUBS3_RND: Opcode = X86ISD::FMADDS3_RND; break; case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FNMADDS1: Opcode = X86ISD::FNMSUBS1; break; - case X86ISD::FNMADDS3: Opcode = X86ISD::FNMSUBS3; break; - case X86ISD::FNMADDS1_RND: Opcode = X86ISD::FNMSUBS1_RND; break; - case X86ISD::FNMADDS3_RND: Opcode = X86ISD::FNMSUBS3_RND; break; case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; - case X86ISD::FNMSUBS1: Opcode = X86ISD::FNMADDS1; break; - case X86ISD::FNMSUBS3: Opcode = X86ISD::FNMADDS3; break; - case X86ISD::FNMSUBS1_RND: Opcode = X86ISD::FNMADDS1_RND; break; - case X86ISD::FNMSUBS3_RND: Opcode = X86ISD::FNMADDS3_RND; break; } } @@ -37803,28 +37755,11 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, return false; }; - bool IsScalarS1 = N->getOpcode() == X86ISD::FMADDS1 || - N->getOpcode() == X86ISD::FMSUBS1 || - N->getOpcode() == X86ISD::FNMADDS1 || - N->getOpcode() == X86ISD::FNMSUBS1 || - N->getOpcode() == X86ISD::FMADDS1_RND || - N->getOpcode() == X86ISD::FMSUBS1_RND || - N->getOpcode() == X86ISD::FNMADDS1_RND || - N->getOpcode() == X86ISD::FNMSUBS1_RND; - bool IsScalarS3 = N->getOpcode() == X86ISD::FMADDS3 || - N->getOpcode() == X86ISD::FMSUBS3 || - N->getOpcode() == X86ISD::FNMADDS3 || - N->getOpcode() == X86ISD::FNMSUBS3 || - N->getOpcode() == X86ISD::FMADDS3_RND || - N->getOpcode() == X86ISD::FMSUBS3_RND || - N->getOpcode() == X86ISD::FNMADDS3_RND || - N->getOpcode() == X86ISD::FNMSUBS3_RND; - // Do not convert the passthru input of scalar intrinsics. // FIXME: We could allow negations of the lower element only. - bool NegA = !IsScalarS1 && invertIfNegative(A); + bool NegA = invertIfNegative(A); bool NegB = invertIfNegative(B); - bool NegC = !IsScalarS3 && invertIfNegative(C); + bool NegC = invertIfNegative(C); if (!NegA && !NegB && !NegC) return SDValue(); @@ -39450,28 +39385,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VZEXT_MOVL: case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget); case X86ISD::FMADD_RND: - case X86ISD::FMADDS1_RND: - case X86ISD::FMADDS3_RND: - case X86ISD::FMADDS1: - case X86ISD::FMADDS3: case X86ISD::FMSUB: case X86ISD::FMSUB_RND: - case X86ISD::FMSUBS1_RND: - case X86ISD::FMSUBS3_RND: - case X86ISD::FMSUBS1: - case X86ISD::FMSUBS3: case X86ISD::FNMADD: case X86ISD::FNMADD_RND: - case X86ISD::FNMADDS1_RND: - case X86ISD::FNMADDS3_RND: - case X86ISD::FNMADDS1: - case X86ISD::FNMADDS3: case X86ISD::FNMSUB: case X86ISD::FNMSUB_RND: - case X86ISD::FNMSUBS1_RND: - case X86ISD::FNMSUBS3_RND: - case X86ISD::FNMSUBS1: - case X86ISD::FNMSUBS3: case ISD::FMA: return combineFMA(N, DAG, Subtarget); case X86ISD::FMADDSUB_RND: case X86ISD::FMSUBADD_RND: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 64ddcf7e4fe6..37002939eb98 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -499,19 +499,6 @@ namespace llvm { FMADDSUB_RND, FMSUBADD_RND, - // Scalar intrinsic FMA. - FMADDS1, FMADDS3, - FNMADDS1, FNMADDS3, - FMSUBS1, FMSUBS3, - FNMSUBS1, FNMSUBS3, - - // Scalar intrinsic FMA with rounding mode. - // Two versions, passthru bits on op1 or op3. - FMADDS1_RND, FMADDS3_RND, - FNMADDS1_RND, FNMADDS3_RND, - FMSUBS1_RND, FMSUBS3_RND, - FNMSUBS1_RND, FNMSUBS3_RND, - // Compress and expand. COMPRESS, EXPAND, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 615de67aadb1..20a7dbd5c38b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6702,22 +6702,22 @@ defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubR // Scalar FMA multiclass avx512_fma3s_common opc, string OpcodeStr, X86VectorVTInfo _, - dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb, dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { let Constraints = "$src1 = $dst", hasSideEffects = 0 in { defm r_Int: AVX512_maskable_3src_scalar, + "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>; + let mayLoad = 1 in defm m_Int: AVX512_maskable_3src_scalar, + "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; defm rb_Int: AVX512_maskable_3src_scalar, + OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; let isCodeGenOnly = 1, isCommutable = 1 in { @@ -6744,18 +6744,11 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in { multiclass avx512_fma3s_all opc213, bits<8> opc231, bits<8> opc132, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, - SDNode OpNodes1, SDNode OpNodeRnds1, SDNode OpNodes3, - SDNode OpNodeRnds3, X86VectorVTInfo _, - string SUFF> { + X86VectorVTInfo _, string SUFF> { let ExeDomain = _.ExeDomain in { defm NAME#213#SUFF#Z: avx512_fma3s_common opc213, bits<8> opc231, bits<8> opc132, _.FRC:$src3, (i32 imm:$rc)))), 0>; defm NAME#231#SUFF#Z: avx512_fma3s_common opc213, bits<8> opc231, bits<8> opc132, // One pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. defm NAME#132#SUFF#Z: avx512_fma3s_common opc213, bits<8> opc231, bits<8> opc132, } multiclass avx512_fma3s opc213, bits<8> opc231, bits<8> opc132, - string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, - SDNode OpNodes1, SDNode OpNodeRnds1, SDNode OpNodes3, - SDNode OpNodeRnds3> { + string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> { let Predicates = [HasAVX512] in { defm NAME : avx512_fma3s_all, + OpNodeRnd, f32x_info, "SS">, EVEX_CD8<32, CD8VT1>, VEX_LIG; defm NAME : avx512_fma3s_all, + OpNodeRnd, f64x_info, "SD">, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W; } } -defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd, - X86Fmadds1, X86FmaddRnds1, X86Fmadds3, - X86FmaddRnds3>; -defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd, - X86Fmsubs1, X86FmsubRnds1, X86Fmsubs3, - X86FmsubRnds3>; -defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd, - X86Fnmadds1, X86FnmaddRnds1, X86Fnmadds3, - X86FnmaddRnds3>; -defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd, - X86Fnmsubs1, X86FnmsubRnds1, X86Fnmsubs3, - X86FnmsubRnds3>; +defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>; +defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>; +defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>; +defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>; multiclass avx512_scalar_fma_patterns opc132, bits<8> opc213, bits<8> opc231, } multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, - string OpStr, SDNode OpNodeIntrin, SDNode OpNode, - X86FoldableSchedWrite sched> { + string OpStr, SDNode OpNode, X86FoldableSchedWrite sched> { let ExeDomain = SSEPackedSingle in defm NAME : fma3s_forms, @@ -319,14 +318,14 @@ multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, VR128, sdmem, sched>, VEX_W; } -defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd, +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadd, SchedWriteFMA.Scl>, VEX_LIG; -defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub, +defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub, SchedWriteFMA.Scl>, VEX_LIG; -defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd, +defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadd, SchedWriteFMA.Scl>, VEX_LIG; -defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub, +defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsub, SchedWriteFMA.Scl>, VEX_LIG; multiclass scalar_fma_patterns; def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>; -// Scalar FMA intrinsics with passthru bits in operand 1. -def X86Fmadds1 : SDNode<"X86ISD::FMADDS1", SDTFPTernaryOp>; -def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>; -def X86Fmsubs1 : SDNode<"X86ISD::FMSUBS1", SDTFPTernaryOp>; -def X86Fnmsubs1 : SDNode<"X86ISD::FNMSUBS1", SDTFPTernaryOp>; - -// Scalar FMA intrinsics with passthru bits in operand 1. -def X86FmaddRnds1 : SDNode<"X86ISD::FMADDS1_RND", SDTFmaRound>; -def X86FnmaddRnds1 : SDNode<"X86ISD::FNMADDS1_RND", SDTFmaRound>; -def X86FmsubRnds1 : SDNode<"X86ISD::FMSUBS1_RND", SDTFmaRound>; -def X86FnmsubRnds1 : SDNode<"X86ISD::FNMSUBS1_RND", SDTFmaRound>; - -def X86Fmadds3 : SDNode<"X86ISD::FMADDS3", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fnmadds3 : SDNode<"X86ISD::FNMADDS3", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fmsubs3 : SDNode<"X86ISD::FMSUBS3", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fnmsubs3 : SDNode<"X86ISD::FNMSUBS3", SDTFPTernaryOp, [SDNPCommutative]>; - -// Scalar FMA intrinsics with passthru bits in operand 3. -def X86FmaddRnds3 : SDNode<"X86ISD::FMADDS3_RND", SDTFmaRound, [SDNPCommutative]>; -def X86FnmaddRnds3 : SDNode<"X86ISD::FNMADDS3_RND", SDTFmaRound, [SDNPCommutative]>; -def X86FmsubRnds3 : SDNode<"X86ISD::FMSUBS3_RND", SDTFmaRound, [SDNPCommutative]>; -def X86FnmsubRnds3 : SDNode<"X86ISD::FNMSUBS3_RND", SDTFmaRound, [SDNPCommutative]>; - def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;