diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index b428589c4e41..7766fea4f5d8 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -99,6 +99,46 @@ bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, return false; } +bool MipsDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h index e98d590a279b..208701e346c6 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h @@ -76,6 +76,23 @@ private: virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Alias); + /// \brief Select constant vector splats. + virtual bool selectVSplat(SDNode *N, APInt &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm3. + virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm4. + virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm5. + virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm6. + virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm8. + virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a simm5. + virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value is a power of 2. + virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + virtual SDNode *Select(SDNode *N); virtual std::pair selectNode(SDNode *Node) = 0; diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 2a841b925cee..5f019367b295 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -221,8 +221,6 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VSMIN: return "MipsISD::VSMIN"; case MipsISD::VUMAX: return "MipsISD::VUMAX"; case MipsISD::VUMIN: return "MipsISD::VUMIN"; - case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; - case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 0d588c1d2a96..ae82e7e7ed66 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -172,12 +172,6 @@ namespace llvm { VUMAX, VUMIN, - // Special case of BUILD_VECTOR where all elements are the same. - VSPLAT, - // Special case of VSPLAT where the result is v2i64, the operand is - // constant, and the operand fits in a signed 10-bits value. - VSPLATD, - // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index 959c05aa0a9c..60dcdce0861c 100644 --- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -def SDT_MipsSplat : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>]>; def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisInt<1>, @@ -34,8 +33,6 @@ def MipsVUMax : SDNode<"MipsISD::VUMAX", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; -def MipsVSplat : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>; -def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; @@ -47,6 +44,48 @@ def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT", def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +// Operands + +def uimm3 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def uimm4 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def uimm8 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def simm5 : Operand; + +def simm10 : Operand; + +def vsplat_uimm3 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_uimm4 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_uimm5 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_uimm6 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_uimm8 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def vsplat_simm5 : Operand; + +def vsplat_simm10 : Operand; + // Pattern fragments def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), (MipsVExtractSExt node:$vec, node:$idx, i8)>; @@ -131,31 +170,96 @@ def vsetult_v8i16 : vsetcc_type; def vsetult_v4i32 : vsetcc_type; def vsetult_v2i64 : vsetcc_type; -def vsplati8 : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>; -def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>; -def vsplati32 : PatFrag<(ops node:$in), (v4i32 (MipsVSplat (i32 node:$in)))>; -def vsplati64 : PatFrag<(ops node:$in), (v2i64 (MipsVSplatD (i32 node:$in)))>; +def vsplati8 : PatFrag<(ops node:$e0), + (v16i8 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati16 : PatFrag<(ops node:$e0), + (v8i16 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati32 : PatFrag<(ops node:$e0), + (v4i32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector:$v0 node:$e0, node:$e0))>; + +class SplatPatLeaf + : PatLeaf { + Operand OpClass = opclass; +} + +class SplatComplexPattern roots = [], + list props = []> : + ComplexPattern { + Operand OpClass = opclass; +} + +def vsplati8_uimm3 : SplatComplexPattern; + +def vsplati8_uimm5 : SplatComplexPattern; + +def vsplati8_uimm8 : SplatComplexPattern; + +def vsplati8_simm5 : SplatComplexPattern; + +def vsplati16_uimm4 : SplatComplexPattern; + +def vsplati16_uimm5 : SplatComplexPattern; + +def vsplati16_simm5 : SplatComplexPattern; + +def vsplati32_uimm5 : SplatComplexPattern; + +def vsplati32_simm5 : SplatComplexPattern; + +def vsplati64_uimm5 : SplatComplexPattern; + +def vsplati64_uimm6 : SplatComplexPattern; + +def vsplati64_simm5 : SplatComplexPattern; + +// Any build_vector that is a constant splat with a value that is an exact +// power of 2 +def vsplat_uimm_pow2 : ComplexPattern; // Immediates def immSExt5 : ImmLeaf(Imm);}]>; def immSExt10: ImmLeaf(Imm);}]>; -def uimm3 : Operand { - let PrintMethod = "printUnsignedImm"; -} - -def uimm4 : Operand { - let PrintMethod = "printUnsignedImm"; -} - -def uimm8 : Operand { - let PrintMethod = "printUnsignedImm"; -} - -def simm5 : Operand; - -def simm10 : Operand; - // Instruction encoding. class ADD_A_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010000>; class ADD_A_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010000>; @@ -912,47 +1016,14 @@ class MSA_BIT_D_DESC_BASE { +class MSA_BIT_SPLAT_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm3:$u3); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u3"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (vsplati8 immZExt3:$u3)))]; - InstrItinClass Itinerary = itin; -} - -class MSA_BIT_SPLATH_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm4:$u4); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u4"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (vsplati16 immZExt4:$u4)))]; - InstrItinClass Itinerary = itin; -} - -class MSA_BIT_SPLATW_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm5:$u5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (vsplati32 immZExt5:$u5)))]; - InstrItinClass Itinerary = itin; -} - -class MSA_BIT_SPLATD_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm6:$u6); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u6"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (vsplati64 immZExt6:$u6)))]; + dag InOperandList = (ins RCWS:$ws, SplatImm.OpClass:$u); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, SplatImm:$u))]; InstrItinClass Itinerary = itin; } @@ -967,38 +1038,24 @@ class MSA_COPY_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm5:$u5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (SplatNode immZExt5:$u5)))]; - InstrItinClass Itinerary = itin; -} - -class MSA_SI5_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, simm5:$s5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $s5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (SplatNode immSExt5:$s5)))]; + dag InOperandList = (ins RCWS:$ws, SplatImm.OpClass:$imm); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $imm"); + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, SplatImm:$imm))]; InstrItinClass Itinerary = itin; } class MSA_I8_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm8:$u8); + dag InOperandList = (ins RCWS:$ws, SplatImm.OpClass:$u8); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, - (SplatNode immZExt8:$u8)))]; + list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, SplatImm:$u8))]; InstrItinClass Itinerary = itin; } @@ -1013,13 +1070,14 @@ class MSA_I8_X_DESC_BASE { +class MSA_I10_LDI_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins simm10:$i10); + dag InOperandList = (ins vsplat_simm10:$i10); string AsmString = !strconcat(instr_asm, "\t$wd, $i10"); - list Pattern = [(set RCWD:$wd, (OpNode immSExt10:$i10))]; + // LDI is matched using custom matching code in MipsSEISelDAGToDAG.cpp + list Pattern = []; + bit hasSideEffects = 0; InstrItinClass Itinerary = itin; } @@ -1033,6 +1091,17 @@ class MSA_2R_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws"); + list Pattern = [(set RCWD:$wd, (VT (OpNode RCWS:$ws)))]; + InstrItinClass Itinerary = itin; +} + class MSA_2RF_DESC_BASE : @@ -1171,17 +1240,17 @@ class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", add, MSA128H>, IsCommutable; class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", add, MSA128W>, IsCommutable; class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", add, MSA128D>, IsCommutable; -class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", add, vsplati8, MSA128B>; -class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", add, vsplati16, MSA128H>; -class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", add, vsplati32, MSA128W>; -class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", add, vsplati64, MSA128D>; +class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", add, vsplati8_uimm5, MSA128B>; +class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", add, vsplati16_uimm5, MSA128H>; +class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", add, vsplati32_uimm5, MSA128W>; +class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", add, vsplati64_uimm5, MSA128D>; class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", and, MSA128B>; class AND_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class AND_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class AND_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", and, vsplati8, MSA128B>; +class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", and, vsplati8_uimm8, MSA128B>; class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b, MSA128B>; class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h, MSA128H>; @@ -1304,11 +1373,11 @@ class BSEL_V_DESC { class BSELI_B_DESC { dag OutOperandList = (outs MSA128B:$wd); - dag InOperandList = (ins MSA128B:$wd_in, MSA128B:$ws, uimm8:$u8); + dag InOperandList = (ins MSA128B:$wd_in, MSA128B:$ws, vsplat_uimm8:$u8); string AsmString = "bseli.b\t$wd, $ws, $u8"; list Pattern = [(set MSA128B:$wd, (vselect MSA128B:$wd_in, MSA128B:$ws, - (vsplati8 immZExt8:$u8)))]; + vsplati8_uimm8:$u8))]; InstrItinClass Itinerary = NoItinerary; string Constraints = "$wd = $wd_in"; } @@ -1339,14 +1408,14 @@ class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", vseteq_v4i32, MSA128W>, class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", vseteq_v2i64, MSA128D>, IsCommutable; -class CEQI_B_DESC : MSA_SI5_DESC_BASE<"ceqi.b", vseteq_v16i8, vsplati8, - MSA128B>; -class CEQI_H_DESC : MSA_SI5_DESC_BASE<"ceqi.h", vseteq_v8i16, vsplati16, - MSA128H>; -class CEQI_W_DESC : MSA_SI5_DESC_BASE<"ceqi.w", vseteq_v4i32, vsplati32, - MSA128W>; -class CEQI_D_DESC : MSA_SI5_DESC_BASE<"ceqi.d", vseteq_v2i64, vsplati64, - MSA128D>; +class CEQI_B_DESC : MSA_I5_DESC_BASE<"ceqi.b", vseteq_v16i8, vsplati8_simm5, + MSA128B>; +class CEQI_H_DESC : MSA_I5_DESC_BASE<"ceqi.h", vseteq_v8i16, vsplati16_simm5, + MSA128H>; +class CEQI_W_DESC : MSA_I5_DESC_BASE<"ceqi.w", vseteq_v4i32, vsplati32_simm5, + MSA128W>; +class CEQI_D_DESC : MSA_I5_DESC_BASE<"ceqi.d", vseteq_v2i64, vsplati64_simm5, + MSA128D>; class CFCMSA_DESC { dag OutOperandList = (outs GPR32:$rd); @@ -1366,23 +1435,23 @@ class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", vsetule_v8i16, MSA128H>; class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", vsetule_v4i32, MSA128W>; class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", vsetule_v2i64, MSA128D>; -class CLEI_S_B_DESC : MSA_SI5_DESC_BASE<"clei_s.b", vsetle_v16i8, vsplati8, - MSA128B>; -class CLEI_S_H_DESC : MSA_SI5_DESC_BASE<"clei_s.h", vsetle_v8i16, vsplati16, - MSA128H>; -class CLEI_S_W_DESC : MSA_SI5_DESC_BASE<"clei_s.w", vsetle_v4i32, vsplati32, - MSA128W>; -class CLEI_S_D_DESC : MSA_SI5_DESC_BASE<"clei_s.d", vsetle_v2i64, vsplati64, - MSA128D>; +class CLEI_S_B_DESC : MSA_I5_DESC_BASE<"clei_s.b", vsetle_v16i8, + vsplati8_simm5, MSA128B>; +class CLEI_S_H_DESC : MSA_I5_DESC_BASE<"clei_s.h", vsetle_v8i16, + vsplati16_simm5, MSA128H>; +class CLEI_S_W_DESC : MSA_I5_DESC_BASE<"clei_s.w", vsetle_v4i32, + vsplati32_simm5, MSA128W>; +class CLEI_S_D_DESC : MSA_I5_DESC_BASE<"clei_s.d", vsetle_v2i64, + vsplati64_simm5, MSA128D>; -class CLEI_U_B_DESC : MSA_I5_DESC_BASE<"clei_u.b", vsetule_v16i8, vsplati8, - MSA128B>; -class CLEI_U_H_DESC : MSA_I5_DESC_BASE<"clei_u.h", vsetule_v8i16, vsplati16, - MSA128H>; -class CLEI_U_W_DESC : MSA_I5_DESC_BASE<"clei_u.w", vsetule_v4i32, vsplati32, - MSA128W>; -class CLEI_U_D_DESC : MSA_I5_DESC_BASE<"clei_u.d", vsetule_v2i64, vsplati64, - MSA128D>; +class CLEI_U_B_DESC : MSA_I5_DESC_BASE<"clei_u.b", vsetule_v16i8, + vsplati8_uimm5, MSA128B>; +class CLEI_U_H_DESC : MSA_I5_DESC_BASE<"clei_u.h", vsetule_v8i16, + vsplati16_uimm5, MSA128H>; +class CLEI_U_W_DESC : MSA_I5_DESC_BASE<"clei_u.w", vsetule_v4i32, + vsplati32_uimm5, MSA128W>; +class CLEI_U_D_DESC : MSA_I5_DESC_BASE<"clei_u.d", vsetule_v2i64, + vsplati64_uimm5, MSA128D>; class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", vsetlt_v16i8, MSA128B>; class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", vsetlt_v8i16, MSA128H>; @@ -1394,23 +1463,23 @@ class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", vsetult_v8i16, MSA128H>; class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", vsetult_v4i32, MSA128W>; class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", vsetult_v2i64, MSA128D>; -class CLTI_S_B_DESC : MSA_SI5_DESC_BASE<"clti_s.b", vsetlt_v16i8, vsplati8, - MSA128B>; -class CLTI_S_H_DESC : MSA_SI5_DESC_BASE<"clti_s.h", vsetlt_v8i16, vsplati16, - MSA128H>; -class CLTI_S_W_DESC : MSA_SI5_DESC_BASE<"clti_s.w", vsetlt_v4i32, vsplati32, - MSA128W>; -class CLTI_S_D_DESC : MSA_SI5_DESC_BASE<"clti_s.d", vsetlt_v2i64, vsplati64, - MSA128D>; +class CLTI_S_B_DESC : MSA_I5_DESC_BASE<"clti_s.b", vsetlt_v16i8, + vsplati8_simm5, MSA128B>; +class CLTI_S_H_DESC : MSA_I5_DESC_BASE<"clti_s.h", vsetlt_v8i16, + vsplati16_simm5, MSA128H>; +class CLTI_S_W_DESC : MSA_I5_DESC_BASE<"clti_s.w", vsetlt_v4i32, + vsplati32_simm5, MSA128W>; +class CLTI_S_D_DESC : MSA_I5_DESC_BASE<"clti_s.d", vsetlt_v2i64, + vsplati64_simm5, MSA128D>; -class CLTI_U_B_DESC : MSA_I5_DESC_BASE<"clti_u.b", vsetult_v16i8, vsplati8, - MSA128B>; -class CLTI_U_H_DESC : MSA_I5_DESC_BASE<"clti_u.h", vsetult_v8i16, vsplati16, - MSA128H>; -class CLTI_U_W_DESC : MSA_I5_DESC_BASE<"clti_u.w", vsetult_v4i32, vsplati32, - MSA128W>; -class CLTI_U_D_DESC : MSA_I5_DESC_BASE<"clti_u.d", vsetult_v2i64, vsplati64, - MSA128D>; +class CLTI_U_B_DESC : MSA_I5_DESC_BASE<"clti_u.b", vsetult_v16i8, + vsplati8_uimm5, MSA128B>; +class CLTI_U_H_DESC : MSA_I5_DESC_BASE<"clti_u.h", vsetult_v8i16, + vsplati16_uimm5, MSA128H>; +class CLTI_U_W_DESC : MSA_I5_DESC_BASE<"clti_u.w", vsetult_v4i32, + vsplati32_uimm5, MSA128W>; +class CLTI_U_D_DESC : MSA_I5_DESC_BASE<"clti_u.d", vsetult_v2i64, + vsplati64_uimm5, MSA128D>; class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", vextract_sext_i8, v16i8, GPR32, MSA128B>; @@ -1592,9 +1661,12 @@ class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w, class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d, MSA128D, MSA128W>; -class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", vsplati8, MSA128B, GPR32>; -class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", vsplati16, MSA128H, GPR32>; -class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", vsplati32, MSA128W, GPR32>; +class FILL_B_DESC : MSA_2R_FILL_DESC_BASE<"fill.b", v16i8, vsplati8, MSA128B, + GPR32>; +class FILL_H_DESC : MSA_2R_FILL_DESC_BASE<"fill.h", v8i16, vsplati16, MSA128H, + GPR32>; +class FILL_W_DESC : MSA_2R_FILL_DESC_BASE<"fill.w", v4i32, vsplati32, MSA128W, + GPR32>; class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128W>; class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128D>; @@ -1779,10 +1851,10 @@ class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128H>; class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128W>; class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128D>; -class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", vsplati8, MSA128B>; -class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", vsplati16, MSA128H>; -class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", vsplati32, MSA128W>; -class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", vsplati64, MSA128D>; +class LDI_B_DESC : MSA_I10_LDI_DESC_BASE<"ldi.b", MSA128B>; +class LDI_H_DESC : MSA_I10_LDI_DESC_BASE<"ldi.h", MSA128H>; +class LDI_W_DESC : MSA_I10_LDI_DESC_BASE<"ldi.w", MSA128W>; +class LDI_D_DESC : MSA_I10_LDI_DESC_BASE<"ldi.d", MSA128D>; class LDX_DESC_BASE; class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", MipsVUMax, MSA128W>; class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", MipsVUMax, MSA128D>; -class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8, +class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8_simm5, MSA128B>; -class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", MipsVSMax, vsplati16, +class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", MipsVSMax, vsplati16_simm5, MSA128H>; -class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", MipsVSMax, vsplati32, +class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", MipsVSMax, vsplati32_simm5, MSA128W>; -class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", MipsVSMax, vsplati64, +class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", MipsVSMax, vsplati64_simm5, MSA128D>; -class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", MipsVUMax, vsplati8, +class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", MipsVUMax, vsplati8_uimm5, MSA128B>; -class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", MipsVUMax, vsplati16, +class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", MipsVUMax, vsplati16_uimm5, MSA128H>; -class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32, +class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32_uimm5, MSA128W>; -class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64, +class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64_uimm5, MSA128D>; class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B>; @@ -1863,22 +1935,22 @@ class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", MipsVUMin, MSA128H>; class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", MipsVUMin, MSA128W>; class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", MipsVUMin, MSA128D>; -class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8, +class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8_simm5, MSA128B>; -class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", MipsVSMin, vsplati16, +class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", MipsVSMin, vsplati16_simm5, MSA128H>; -class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", MipsVSMin, vsplati32, +class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", MipsVSMin, vsplati32_simm5, MSA128W>; -class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", MipsVSMin, vsplati64, +class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", MipsVSMin, vsplati64_simm5, MSA128D>; -class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", MipsVUMin, vsplati8, +class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", MipsVUMin, vsplati8_uimm5, MSA128B>; -class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", MipsVUMin, vsplati16, +class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", MipsVUMin, vsplati16_uimm5, MSA128H>; -class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32, +class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32_uimm5, MSA128W>; -class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64, +class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64_uimm5, MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; @@ -1942,14 +2014,15 @@ class NOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class NOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class NOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", MipsVNOR, vsplati8, MSA128B>; +class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", MipsVNOR, vsplati8_uimm8, + MSA128B>; class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", or, MSA128B>; class OR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class OR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8, MSA128B>; +class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8_uimm8, MSA128B>; class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", int_mips_pckev_b, MSA128B>; class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", int_mips_pckev_h, MSA128H>; @@ -1995,10 +2068,14 @@ class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128H>; class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128W>; class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128D>; -class SLLI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"slli.b", shl, MSA128B>; -class SLLI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"slli.h", shl, MSA128H>; -class SLLI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"slli.w", shl, MSA128W>; -class SLLI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"slli.d", shl, MSA128D>; +class SLLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.b", shl, vsplati8_uimm3, + MSA128B>; +class SLLI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.h", shl, vsplati16_uimm4, + MSA128H>; +class SLLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.w", shl, vsplati32_uimm5, + MSA128W>; +class SLLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.d", shl, vsplati64_uimm6, + MSA128D>; class SPLAT_B_DESC : MSA_3R_DESC_BASE<"splat.b", int_mips_splat_b, MSA128B, MSA128B, GPR32>; @@ -2023,10 +2100,14 @@ class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128H>; class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128W>; class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128D>; -class SRAI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"srai.b", sra, MSA128B>; -class SRAI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"srai.h", sra, MSA128H>; -class SRAI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"srai.w", sra, MSA128W>; -class SRAI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"srai.d", sra, MSA128D>; +class SRAI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.b", sra, vsplati8_uimm3, + MSA128B>; +class SRAI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.h", sra, vsplati16_uimm4, + MSA128H>; +class SRAI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.w", sra, vsplati32_uimm5, + MSA128W>; +class SRAI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.d", sra, vsplati64_uimm6, + MSA128D>; class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128B>; class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128H>; @@ -2043,10 +2124,14 @@ class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128H>; class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128W>; class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128D>; -class SRLI_B_DESC : MSA_BIT_SPLATB_DESC_BASE<"srli.b", srl, MSA128B>; -class SRLI_H_DESC : MSA_BIT_SPLATH_DESC_BASE<"srli.h", srl, MSA128H>; -class SRLI_W_DESC : MSA_BIT_SPLATW_DESC_BASE<"srli.w", srl, MSA128W>; -class SRLI_D_DESC : MSA_BIT_SPLATD_DESC_BASE<"srli.d", srl, MSA128D>; +class SRLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.b", srl, vsplati8_uimm3, + MSA128B>; +class SRLI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.h", srl, vsplati16_uimm4, + MSA128H>; +class SRLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.w", srl, vsplati32_uimm5, + MSA128W>; +class SRLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.d", srl, vsplati64_uimm6, + MSA128D>; class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128B>; class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128H>; @@ -2123,10 +2208,10 @@ class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", sub, MSA128H>; class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", sub, MSA128W>; class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", sub, MSA128D>; -class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", sub, vsplati8, MSA128B>; -class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", sub, vsplati16, MSA128H>; -class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32, MSA128W>; -class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64, MSA128D>; +class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", sub, vsplati8_uimm5, MSA128B>; +class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", sub, vsplati16_uimm5, MSA128H>; +class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32_uimm5, MSA128W>; +class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64_uimm5, MSA128D>; class VSHF_B_DESC : MSA_3R_DESC_BASE<"vshf.b", int_mips_vshf_b, MSA128B>; class VSHF_H_DESC : MSA_3R_DESC_BASE<"vshf.h", int_mips_vshf_h, MSA128H>; @@ -2138,7 +2223,7 @@ class XOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class XOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; class XOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE; -class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", xor, vsplati8, MSA128B>; +class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", xor, vsplati8_uimm8, MSA128B>; // Instruction defs. def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC; diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 10221d5fccc8..8a9481c7dee6 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -374,6 +374,147 @@ bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base, selectAddrDefault(Addr, Base, Offset); } +// Select constant vector splats. +// +// Returns true and sets Imm if: +// * MSA is enabled +// * N is a ISD::BUILD_VECTOR representing a constant splat +// * The splat value fits in a signed 32-bit value. +// +// That last requirement isn't strictly a requirement of the instruction set +// but it simplifies the callers by allowing them to assume they don't have to +// handle 64-bit values. The callers will also be placing stricter requirements +// on the immediates so this doesn't prohibit selection of legal immediates. +bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { + if (!Subtarget.hasMSA()) + return false; + + BuildVectorSDNode *Node = dyn_cast(N); + + if (Node == NULL) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, + !Subtarget.isLittle())) + return false; + + // None of the immediate forms can handle more than 32 bits + if (!SplatValue.isIntN(32)) + return false; + + Imm = SplatValue; + + return true; +} + +// Select constant vector splats. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value fits in an integer with the specified signed-ness and +// width. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +// +// It's worth noting that this function is not used as part of the selection +// of ldi.[bhwd] since it does not permit using the wrong-typed ldi.[bhwd] +// instruction to achieve the desired bit pattern. ldi.[bhwd] is selected in +// MipsSEDAGToDAGISel::selectNode. +bool MipsSEDAGToDAGISel:: +selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat (N.getNode(), ImmValue) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + if (( Signed && ImmValue.isSignedIntN(ImmBitSize)) || + (!Signed && ImmValue.isIntN(ImmBitSize))) { + Imm = CurDAG->getTargetConstant(ImmValue, EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm3(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 3); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm4(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 4); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 5); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm6(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 6); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatUimm8(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 8); +} + +// Select constant vector splats. +bool MipsSEDAGToDAGISel:: +selectVSplatSimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, true, 5); +} + +// Select constant vector splats whose value is a power of 2. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a power of two. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool MipsSEDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat (N.getNode(), ImmValue) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = ImmValue.exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, EltTy); + return true; + } + } + + return false; +} + std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); @@ -545,6 +686,82 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { MVT::Untyped, Ops); return std::make_pair(true, Res); } + + case ISD::BUILD_VECTOR: { + // Select appropriate ldi.[bhwd] instructions for constant splats of + // 128-bit when MSA is enabled. Fixup any register class mismatches that + // occur as a result. + // + // This allows the compiler to use a wider range of immediates than would + // otherwise be allowed. If, for example, v4i32 could only use ldi.h then + // it would not be possible to load { 0x01010101, 0x01010101, 0x01010101, + // 0x01010101 } without using a constant pool. This would be sub-optimal + // when // 'ldi.b wd, 1' is capable of producing that bit-pattern in the + // same set/ of registers. Similarly, ldi.h isn't capable of producing { + // 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can. + + BuildVectorSDNode *BVN = cast(Node); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned LdiOp; + EVT ResVecTy = BVN->getValueType(0); + EVT ViaVecTy; + + if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector()) + return std::make_pair(false, (SDNode*)NULL); + + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, + !Subtarget.isLittle())) + return std::make_pair(false, (SDNode*)NULL); + + switch (SplatBitSize) { + default: + return std::make_pair(false, (SDNode*)NULL); + case 8: + LdiOp = Mips::LDI_B; + ViaVecTy = MVT::v16i8; + break; + case 16: + LdiOp = Mips::LDI_H; + ViaVecTy = MVT::v8i16; + break; + case 32: + LdiOp = Mips::LDI_W; + ViaVecTy = MVT::v4i32; + break; + case 64: + LdiOp = Mips::LDI_D; + ViaVecTy = MVT::v2i64; + break; + } + + if (!SplatValue.isSignedIntN(10)) + return std::make_pair(false, (SDNode*)NULL); + + SDValue Imm = CurDAG->getTargetConstant(SplatValue, + ViaVecTy.getVectorElementType()); + + SDNode *Res = CurDAG->getMachineNode(LdiOp, SDLoc(Node), ViaVecTy, Imm); + + if (ResVecTy != ViaVecTy) { + // If LdiOp is writing to a different register class to ResVecTy, then + // fix it up here. This COPY_TO_REGCLASS should never cause a move.v + // since the source and destination register sets contain the same + // registers. + const TargetLowering *TLI = getTargetLowering(); + MVT ResVecTySimple = ResVecTy.getSimpleVT(); + const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); + Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, SDLoc(Node), + ResVecTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), + MVT::i32)); + } + + return std::make_pair(true, Res); + } + } return std::make_pair(false, (SDNode*)NULL); diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index 22e597ebf88a..fe0da12aa321 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -58,6 +58,26 @@ private: virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base, SDValue &Offset) const; + /// \brief Select constant vector splats. + virtual bool selectVSplat(SDNode *N, APInt &Imm) const; + /// \brief Select constant vector splats whose value fits in a given integer. + virtual bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const; + /// \brief Select constant vector splats whose value fits in a uimm3. + virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm4. + virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm5. + virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm6. + virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a uimm8. + virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value fits in a simm5. + virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value is a power of 2. + virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + virtual std::pair selectNode(SDNode *Node); virtual void processFunctionAfterISel(MachineFunction &MF); diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 1575cdef2d67..f135b5f3857d 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -738,22 +738,11 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue NotOp; - ConstantSDNode *Const; if (ISD::isBuildVectorAllOnes(Op0.getNode())) NotOp = Op1; else if (ISD::isBuildVectorAllOnes(Op1.getNode())) NotOp = Op0; - else if ((Op0->getOpcode() == MipsISD::VSPLAT || - Op0->getOpcode() == MipsISD::VSPLATD) && - (Const = dyn_cast(Op0->getOperand(0))) && - Const->isAllOnesValue()) - NotOp = Op1; - else if ((Op1->getOpcode() == MipsISD::VSPLAT || - Op1->getOpcode() == MipsISD::VSPLATD) && - (Const = dyn_cast(Op1->getOperand(0))) && - Const->isAllOnesValue()) - NotOp = Op0; else return SDValue(); @@ -1084,14 +1073,38 @@ static SDValue lowerMSAInsertIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } -static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { +static SDValue lowerMSASplatImm(SDValue Op, SDValue ImmOp, SelectionDAG &DAG) { EVT ResTy = Op->getValueType(0); + EVT ViaVecTy = ResTy; + SmallVector Ops; + SDValue ImmHiOp; + SDLoc DL(Op); - unsigned SplatOp = MipsISD::VSPLAT; - if (ResTy == MVT::v2i64) - SplatOp = MipsISD::VSPLATD; + if (ViaVecTy == MVT::v2i64) { + ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp, + DAG.getConstant(31, MVT::i32)); + for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) { + Ops.push_back(ImmHiOp); + Ops.push_back(ImmOp); + } + ViaVecTy = MVT::v4i32; + } else { + for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) + Ops.push_back(ImmOp); + } - return DAG.getNode(SplatOp, SDLoc(Op), ResTy, Op->getOperand(ImmOp)); + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, &Ops[0], + Ops.size()); + + if (ResTy != ViaVecTy) + Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); + + return Result; +} + +static SDValue +lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { + return lowerMSASplatImm(Op, Op->getOperand(ImmOp), DAG); } static SDValue lowerMSAUnaryIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { @@ -1306,8 +1319,16 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerMSABinaryIntr(Op, DAG, ISD::FDIV); case Intrinsic::mips_fill_b: case Intrinsic::mips_fill_h: - case Intrinsic::mips_fill_w: - return lowerMSAUnaryIntr(Op, DAG, MipsISD::VSPLAT); + case Intrinsic::mips_fill_w: { + SmallVector Ops; + EVT ResTy = Op->getValueType(0); + + for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) + Ops.push_back(Op->getOperand(1)); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), ResTy, &Ops[0], + Ops.size()); + } case Intrinsic::mips_flog2_w: case Intrinsic::mips_flog2_d: return lowerMSAUnaryIntr(Op, DAG, ISD::FLOG2); @@ -1331,7 +1352,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_ldi_h: case Intrinsic::mips_ldi_w: case Intrinsic::mips_ldi_d: - return lowerMSAUnaryIntr(Op, DAG, MipsISD::VSPLAT); + return lowerMSASplatImm(Op, 1, DAG); case Intrinsic::mips_max_s_b: case Intrinsic::mips_max_s_h: case Intrinsic::mips_max_s_w: @@ -1597,18 +1618,36 @@ lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { DAG.getValueType(EltTy)); } +static bool isConstantOrUndef(const SDValue Op) { + if (Op->getOpcode() == ISD::UNDEF) + return true; + if (dyn_cast(Op)) + return true; + if (dyn_cast(Op)) + return true; + return false; +} + +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the // backend. // // Lowers according to the following rules: -// - Vectors of 128-bits may be legal subject to the other rules. Other sizes -// are not legal. -// - Non-constant splats are legal and are lowered to MipsISD::VSPLAT. -// - Constant splats with an element size of 32-bits or less are legal and are -// lowered to MipsISD::VSPLAT. -// - Constant splats with an element size of 64-bits but whose value would fit -// within a 10 bit immediate are legal and are lowered to MipsISD::VSPLATD. -// - All other ISD::BUILD_VECTORS are not legal +// - Constant splats are legal as-is as long as the SplatBitSize is a power of +// 2 less than or equal to 64 and the value fits into a signed 10-bit +// immediate +// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize +// is a power of 2 less than or equal to 64 and the value does not fit into a +// signed 10-bit immediate +// - Non-constant splats are legal as-is. +// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. +// - All others are illegal and must be expanded. SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { BuildVectorSDNode *Node = cast(Op); @@ -1623,52 +1662,51 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, - !Subtarget->isLittle())) { - SDValue Result; - EVT TmpVecTy; - EVT ConstTy = MVT::i32; - unsigned SplatOp = MipsISD::VSPLAT; + !Subtarget->isLittle()) && SplatBitSize <= 64) { + // We can only cope with 8, 16, 32, or 64-bit elements + if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && + SplatBitSize != 64) + return SDValue(); + + // If the value fits into a simm10 then we can use ldi.[bhwd] + if (SplatValue.isSignedIntN(10)) + return Op; + + EVT ViaVecTy; switch (SplatBitSize) { default: return SDValue(); - case 64: - TmpVecTy = MVT::v2i64; - - // i64 is an illegal type on Mips32, but if it the constant fits into a - // signed 10-bit value then we can still handle it using VSPLATD and an - // i32 constant - if (HasMips64) - ConstTy = MVT::i64; - else if (isInt<10>(SplatValue.getSExtValue())) { - SplatValue = SplatValue.trunc(32); - SplatOp = MipsISD::VSPLATD; - } else - return SDValue(); - break; - case 32: - TmpVecTy = MVT::v4i32; + case 8: + ViaVecTy = MVT::v16i8; break; case 16: - TmpVecTy = MVT::v8i16; - SplatValue = SplatValue.sext(32); + ViaVecTy = MVT::v8i16; break; - case 8: - TmpVecTy = MVT::v16i8; - SplatValue = SplatValue.sext(32); + case 32: + ViaVecTy = MVT::v4i32; break; + case 64: + // There's no fill.d to fall back on for 64-bit values + return SDValue(); } - Result = DAG.getNode(SplatOp, DL, TmpVecTy, - DAG.getConstant(SplatValue, ConstTy)); - if (ResTy != Result.getValueType()) - Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); + SmallVector Ops; + SDValue Constant = DAG.getConstant(SplatValue.sextOrSelf(32), MVT::i32); + + for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) + Ops.push_back(Constant); + + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Node), ViaVecTy, + &Ops[0], Ops.size()); + + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); return Result; - } - else if (isSplatVector(Node)) - return DAG.getNode(MipsISD::VSPLAT, DL, ResTy, Op->getOperand(0)); - else { + } else if (isSplatVector(Node)) + return Op; + else if (!isConstantOrUndefBUILD_VECTOR(Node)) { // Use INSERT_VECTOR_ELT operations rather than expand to stores. // The resulting code is the same length as the expansion, but it doesn't // use memory operations diff --git a/llvm/test/CodeGen/Mips/msa/compare_float.ll b/llvm/test/CodeGen/Mips/msa/compare_float.ll index 6bbcea05d583..287578a36539 100644 --- a/llvm/test/CodeGen/Mips/msa/compare_float.ll +++ b/llvm/test/CodeGen/Mips/msa/compare_float.ll @@ -17,7 +17,7 @@ define void @false_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwi ; (setcc $a, $b, SETFALSE) is always folded, so we won't get fcaf: ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0 - ; CHECK-DAG: st.b [[R1]], 0($4) + ; CHECK-DAG: st.w [[R1]], 0($4) ; CHECK: .size false_v4f32 } @@ -494,7 +494,7 @@ define void @true_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwin ; (setcc $a, $b, SETTRUE) is always folded, so we won't get fcaf: ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], -1 - ; CHECK-DAG: st.b [[R1]], 0($4) + ; CHECK-DAG: st.w [[R1]], 0($4) ; CHECK: .size true_v4f32 }