diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td index f5eb32c0a601..360d348af3a6 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -44,7 +44,7 @@ def RetCC_SystemZ : CallingConv<[ // Similarly for vectors, with V24 being the ABI-compliant choice. CCIfSubtarget<"hasVector()", - CCIfType<[v16i8, v8i16, v4i32, v2i64], + CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64], CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>> // ABI-compliant code returns long double by reference, but that conversion @@ -76,13 +76,13 @@ def CC_SystemZ : CallingConv<[ // The first 8 named vector arguments are passed in V24-V31. CCIfSubtarget<"hasVector()", - CCIfType<[v16i8, v8i16, v4i32, v2i64], + CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64], CCIfFixed>>>, // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots. CCIfSubtarget<"hasVector()", - CCIfType<[v16i8, v8i16, v4i32, v2i64], + CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64], CCAssignToStack<16, 8>>>, // Other arguments are passed in 8-byte-aligned 8-byte stack slots. diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index ddcb792ee097..5f547439c9aa 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -101,6 +101,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); } // Compute derived properties from the register classes @@ -327,6 +328,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, } } + if (Subtarget.hasVector()) { + // There should be no need to check for float types other than v2f64 + // since <2 x f32> isn't a legal type. + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + } + // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -352,6 +362,33 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, } } + // Handle floating-point vector types. + if (Subtarget.hasVector()) { + // Scalar-to-vector conversion is just a subreg. + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + + // Some insertions and extractions can be done directly but others + // need to go via integers. + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + + // These operations have direct equivalents. + setOperationAction(ISD::FADD, MVT::v2f64, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Legal); + setOperationAction(ISD::FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMA, MVT::v2f64, Legal); + setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FABS, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + } + // We have fused multiply-addition for f32 and f64 but not f128. setOperationAction(ISD::FMA, MVT::f32, Legal); setOperationAction(ISD::FMA, MVT::f64, Legal); @@ -818,6 +855,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: + case MVT::v2f64: RC = &SystemZ::VR128BitRegClass; break; } @@ -1894,18 +1932,25 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, return Result; } -// Return the SystemZISD vector comparison operation for CC, or 0 if it cannot -// be done directly. -static unsigned getVectorComparison(ISD::CondCode CC) { +// Return the SystemISD vector comparison operation for CC, or 0 if it cannot +// be done directly. IsFP is true if CC is for a floating-point rather than +// integer comparison. +static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { switch (CC) { + case ISD::SETOEQ: case ISD::SETEQ: - return SystemZISD::VICMPE; + return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; + case ISD::SETOGE: + case ISD::SETGE: + return IsFP ? SystemZISD::VFCMPHE : 0; + + case ISD::SETOGT: case ISD::SETGT: - return SystemZISD::VICMPH; + return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; case ISD::SETUGT: - return SystemZISD::VICMPHL; + return IsFP ? 0 : SystemZISD::VICMPHL; default: return 0; @@ -1914,15 +1959,17 @@ static unsigned getVectorComparison(ISD::CondCode CC) { // Return the SystemZISD vector comparison operation for CC or its inverse, // or 0 if neither can be done directly. Indicate in Invert whether the -// result is for the inverse of CC. -static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool &Invert) { - if (unsigned Opcode = getVectorComparison(CC)) { +// result is for the inverse of CC. IsFP is true if CC is for a +// floating-point rather than integer comparison. +static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, + bool &Invert) { + if (unsigned Opcode = getVectorComparison(CC, IsFP)) { Invert = false; return Opcode; } - CC = ISD::getSetCCInverse(CC, true); - if (unsigned Opcode = getVectorComparison(CC)) { + CC = ISD::getSetCCInverse(CC, !IsFP); + if (unsigned Opcode = getVectorComparison(CC, IsFP)) { Invert = true; return Opcode; } @@ -1935,18 +1982,46 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool &Invert) { static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, SDValue CmpOp1) { + bool IsFP = CmpOp0.getValueType().isFloatingPoint(); bool Invert = false; SDValue Cmp; - // It doesn't really matter whether we try the inversion or the swap first, - // since there are no cases where both work. - if (unsigned Opcode = getVectorComparisonOrInvert(CC, Invert)) - Cmp = DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); - else { - CC = ISD::getSetCCSwappedOperands(CC); - if (unsigned Opcode = getVectorComparisonOrInvert(CC, Invert)) - Cmp = DAG.getNode(Opcode, DL, VT, CmpOp1, CmpOp0); - else - llvm_unreachable("Unhandled comparison"); + switch (CC) { + // Handle tests for order using (or (ogt y x) (oge x y)). + case ISD::SETUO: + Invert = true; + case ISD::SETO: { + assert(IsFP && "Unexpected integer comparison"); + SDValue LT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); + SDValue GE = DAG.getNode(SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); + Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); + break; + } + + // Handle <> tests using (or (ogt y x) (ogt x y)). + case ISD::SETUEQ: + Invert = true; + case ISD::SETONE: { + assert(IsFP && "Unexpected integer comparison"); + SDValue LT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); + SDValue GT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); + Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); + break; + } + + // Otherwise a single comparison is enough. It doesn't really + // matter whether we try the inversion or the swap first, since + // there are no cases where both work. + default: + if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) + Cmp = DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); + else { + CC = ISD::getSetCCSwappedOperands(CC); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) + Cmp = DAG.getNode(Opcode, DL, VT, CmpOp1, CmpOp0); + else + llvm_unreachable("Unhandled comparison"); + } + break; } if (Invert) { SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, @@ -3326,6 +3401,46 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { return DAG.getNode(ISD::BITCAST, DL, VT, Op); } +// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. +static bool isScalarToVector(SDValue Op) { + for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) + if (Op.getOperand(I).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +// Return a vector of type VT that contains Value in the first element. +// The other elements don't matter. +static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT, + SDValue Value) { + // If we have a constant, replicate it to all elements and let the + // BUILD_VECTOR lowering take care of it. + if (Value.getOpcode() == ISD::Constant || + Value.getOpcode() == ISD::ConstantFP) { + SmallVector Ops(VT.getVectorNumElements(), Value); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + if (Value.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); +} + +// Return a vector of type VT in which Op0 is in element 0 and Op1 is in +// element 1. Used for cases in which replication is cheap. +static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, + SDValue Op0, SDValue Op1) { + if (Op0.getOpcode() == ISD::UNDEF) { + if (Op1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); + } + if (Op1.getOpcode() == ISD::UNDEF) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); + return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, + buildScalarToVector(DAG, DL, VT, Op0), + buildScalarToVector(DAG, DL, VT, Op1)); +} + // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 // vector for them. static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0, @@ -3502,6 +3617,10 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, if (VT == MVT::v2i64) return joinDwords(DAG, DL, Elems[0], Elems[1]); + // Use a 64-bit merge high to combine two doubles. + if (VT == MVT::v2f64) + return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); + // Collect the constant terms. SmallVector Constants(NumElements, SDValue()); SmallVector Done(NumElements, false); @@ -3614,6 +3733,10 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, if (Res.getNode()) return Res; + // Detect SCALAR_TO_VECTOR conversions. + if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) + return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); + // Otherwise use buildVector to build the vector up from GPRs. unsigned NumElements = Op.getNumOperands(); SmallVector Ops(NumElements); @@ -3664,6 +3787,62 @@ SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); } +SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + // Handle insertions of floating-point values. + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + EVT VT = Op.getValueType(); + + // Insertions into constant indices can be done using VPDI. However, + // if the inserted value is a bitcast or a constant then it's better + // to use GPRs, as below. + if (Op1.getOpcode() != ISD::BITCAST && + Op1.getOpcode() != ISD::ConstantFP && + Op2.getOpcode() == ISD::Constant) { + uint64_t Index = dyn_cast(Op2)->getZExtValue(); + unsigned Mask = VT.getVectorNumElements() - 1; + if (Index <= Mask) + return Op; + } + + // Otherwise bitcast to the equivalent integer form and insert via a GPR. + MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits()); + MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); + SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, + DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), + DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); + return DAG.getNode(ISD::BITCAST, DL, VT, Res); +} + +SDValue +SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + // Handle extractions of floating-point values. + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + EVT VT = Op.getValueType(); + EVT VecVT = Op0.getValueType(); + + // Extractions of constant indices can be done directly. + if (auto *CIndexN = dyn_cast(Op1)) { + uint64_t Index = CIndexN->getZExtValue(); + unsigned Mask = VecVT.getVectorNumElements() - 1; + if (Index <= Mask) + return Op; + } + + // Otherwise bitcast to the equivalent integer form and extract via a GPR. + MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); + MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, + DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); + return DAG.getNode(ISD::BITCAST, DL, VT, Res); +} + SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const { // Look for cases where a vector shift can use the *_BY_SCALAR form. @@ -3808,6 +3987,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerVECTOR_SHUFFLE(Op, DAG); case ISD::SCALAR_TO_VECTOR: return lowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::SHL: return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); case ISD::SRL: @@ -3879,6 +4062,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VICMPE); OPCODE(VICMPH); OPCODE(VICMPHL); + OPCODE(VFCMPE); + OPCODE(VFCMPH); + OPCODE(VFCMPHE); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 4b7d59089462..8319c01fc5e2 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -219,6 +219,13 @@ enum { VICMPH, VICMPHL, + // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1 + // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and + // greater than" and VFCMPHE for "ordered and greater than or equal to". + VFCMPE, + VFCMPH, + VFCMPHE, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_. // @@ -400,6 +407,8 @@ private: SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index d94725b79134..546974aa5d8f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -118,18 +118,24 @@ let Predicates = [FeatureVector] in { def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>; def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>; def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>; + def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)), + (VLREPG bdxaddr12only:$addr)>; // Load logical element and zero. def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>; def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>; def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>; + def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)), + (VLLEZG bdxaddr12only:$addr)>; // Load element. def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>; def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>; def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>; def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>; + def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index), + (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; // Gather element. def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>; @@ -152,6 +158,7 @@ defm : ReplicatePeephole; defm : ReplicatePeephole; defm : ReplicatePeephole; defm : ReplicatePeephole; +defm : ReplicatePeephole; //===----------------------------------------------------------------------===// // Stores @@ -172,6 +179,9 @@ let Predicates = [FeatureVector] in { def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>; def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>; def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>; + def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr, + imm32zx1:$index), + (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; // Scatter element. def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; @@ -188,12 +198,14 @@ let Predicates = [FeatureVector] in { def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>; def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>; def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>; + def : BinaryRRWithType; // Merge low. def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>; def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>; def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>; def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>; + def : BinaryRRWithType; // Permute. def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>; @@ -206,6 +218,8 @@ let Predicates = [FeatureVector] in { def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; + def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)), + (VREPG VR128:$vec, imm32zx16:$index)>; // Select. def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>; @@ -287,6 +301,7 @@ defm : GenericVectorOps; defm : GenericVectorOps; defm : GenericVectorOps; defm : GenericVectorOps; +defm : GenericVectorOps; //===----------------------------------------------------------------------===// // Integer arithmetic @@ -734,34 +749,52 @@ let Predicates = [FeatureVector] in { // Floating-point arithmetic //===----------------------------------------------------------------------===// +// See comments in SystemZInstrFP.td for the suppression flags and +// rounding modes. +multiclass VectorRounding { + def : FPConversion; + def : FPConversion; + def : FPConversion; + def : FPConversion; + def : FPConversion; + def : FPConversion; +} + let Predicates = [FeatureVector] in { // Add. - def VFADB : BinaryVRRc<"vfadb", 0xE7E3, null_frag, v128db, v128db, 3, 0>; + def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; def WFADB : BinaryVRRc<"wfadb", 0xE7E3, null_frag, v64db, v64db, 3, 8>; // Convert from fixed 64-bit. def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; + def : FPConversion; // Convert from logical 64-bit. def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; + def : FPConversion; // Convert to fixed 64-bit. def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion; // Convert to logical 64-bit. def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion; // Divide. - def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, null_frag, v128db, v128db, 3, 0>; + def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, null_frag, v64db, v64db, 3, 8>; // Load FP integer. def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>; def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; + defm : VectorRounding; // Load lengthened. def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, null_frag, v128db, v128eb, 2, 0>; @@ -772,35 +805,35 @@ let Predicates = [FeatureVector] in { def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; // Multiply. - def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, null_frag, v128db, v128db, 3, 0>; + def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, null_frag, v64db, v64db, 3, 8>; // Multiply and add. - def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, null_frag, v128db, v128db, 0, 3>; + def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, null_frag, v64db, v64db, 8, 3>; // Multiply and subtract. - def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, null_frag, v128db, v128db, 0, 3>; + def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, null_frag, v64db, v64db, 8, 3>; // Load complement, - def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 0>; + def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 0>; // Load negative. - def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 1>; + def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 1>; // Load positive. - def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 2>; + def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 2>; // Square root. - def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, null_frag, v128db, v128db, 3, 0>; + def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, null_frag, v64db, v64db, 3, 8>; // Subtract. - def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, null_frag, v128db, v128db, 3, 0>; + def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, null_frag, v64db, v64db, 3, 8>; // Test data class immediate. @@ -824,19 +857,19 @@ let Predicates = [FeatureVector] in { def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; // Compare equal. - defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, null_frag, null_frag, + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, null_frag, v128g, v128db, 3, 0>; defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, v64g, v64db, 3, 8>; // Compare high. - defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, null_frag, null_frag, + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, null_frag, v128g, v128db, 3, 0>; defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, v64g, v64db, 3, 8>; // Compare high or equal. - defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, null_frag, null_frag, + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, null_frag, v128g, v128db, 3, 0>; defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, v64g, v64db, 3, 8>; @@ -849,18 +882,27 @@ let Predicates = [FeatureVector] in { def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; //===----------------------------------------------------------------------===// // Replicating scalars @@ -880,6 +922,46 @@ def : VectorReplicateScalar; def : Pat<(v2i64 (z_replicate GR64:$scalar)), (VLVGP GR64:$scalar, GR64:$scalar)>; +//===----------------------------------------------------------------------===// +// Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +// Floating-point values are stored in element 0 of the corresponding +// vector register. Scalar to vector conversion is just a subreg and +// scalar replication can just replicate element 0 of the vector register. +multiclass ScalarToVectorFP { + def : Pat<(vt (scalar_to_vector cls:$scalar)), + (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>; + def : Pat<(vt (z_replicate cls:$scalar)), + (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, + subreg), 0)>; +} +defm : ScalarToVectorFP; + +// Match v2f64 insertions. The AddedComplexity counters the 3 added by +// TableGen for the base register operand in VLVG-based integer insertions +// and ensures that this version is strictly better. +let AddedComplexity = 4 in { + def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0), + (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, + subreg_r64), VR128:$vec, 1)>; + def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1), + (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, + subreg_r64), 0)>; +} + +// We extract f64 element X by replicating (for elements other than 0) +// and then taking a high subreg. The AddedComplexity counters the 3 +// added by TableGen for the base register operand in VLGV-based integer +// extractions and ensures that this version is strictly better. +let AddedComplexity = 4 in { + def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)), + (EXTRACT_SUBREG VR128:$vec, subreg_r64)>; + def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)), + (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>; +} + //===----------------------------------------------------------------------===// // String instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 2e431859a860..7cf7d862ffec 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -200,6 +200,9 @@ def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>; def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>; def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>; def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>; +def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; +def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; +def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; class AtomicWOp : SDNode<"SystemZISD::"##name, profile, @@ -468,6 +471,10 @@ def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>; def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3), (add (mul node:$src1, node:$src2), node:$src3)>; +// Fused multiply-subtract, using the natural operand order. +def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fma node:$src1, node:$src2, (fneg node:$src3))>; + // Fused multiply-add and multiply-subtract, but with the order of the // operands matching SystemZ's MA and MS instructions. def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -501,6 +508,7 @@ def z_replicate_loadi8 : z_replicate_load; def z_replicate_loadi16 : z_replicate_load; def z_replicate_loadi32 : z_replicate_load; def z_replicate_loadi64 : z_replicate_load; +def z_replicate_loadf64 : z_replicate_load; // Load a scalar and insert it into a single element of a vector. class z_vle @@ -511,6 +519,7 @@ def z_vlei8 : z_vle; def z_vlei16 : z_vle; def z_vlei32 : z_vle; def z_vlei64 : z_vle; +def z_vlef64 : z_vle; // Load a scalar and insert it into the low element of the high i64 of a // zeroed vector. @@ -523,6 +532,10 @@ def z_vllezi16 : z_vllez; def z_vllezi32 : z_vllez; def z_vllezi64 : PatFrag<(ops node:$addr), (z_join_dwords (i64 (load node:$addr)), (i64 0))>; +def z_vllezf64 : PatFrag<(ops node:$addr), + (z_merge_high + (scalar_to_vector (f64 (load node:$addr))), + (z_vzero))>; // Store one element of a vector. class z_vste @@ -533,6 +546,7 @@ def z_vstei8 : z_vste; def z_vstei16 : z_vste; def z_vstei32 : z_vste; def z_vstei64 : z_vste; +def z_vstef64 : z_vste; // Arithmetic negation on vectors. def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>; diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td index e307f8a888ee..16a7ed784d70 100644 --- a/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -153,3 +153,17 @@ multiclass CompareZeroFP { // The sign of the zero makes no difference. def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; } + +// Use INSN for performing binary operation OPERATION of type VT +// on registers of class CLS. +class BinaryRRWithType + : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>; + +// Use INSN to perform conversion operation OPERATOR, with the input being +// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions +// and 0 to allow them. MODE is the rounding mode to use. +class FPConversion suppress, bits<4> mode> + : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), + (insn tr2.op:$vec, suppress, mode)>; diff --git a/llvm/test/CodeGen/SystemZ/vec-abs-05.ll b/llvm/test/CodeGen/SystemZ/vec-abs-05.ll new file mode 100644 index 000000000000..89142b218544 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-abs-05.ll @@ -0,0 +1,24 @@ +; Test v2f64 absolute. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) + +; Test a plain absolute. +define <2 x double> @f1(<2 x double> %val) { +; CHECK-LABEL: f1: +; CHECK: vflpdb %v24, %v24 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val) + ret <2 x double> %ret +} + +; Test a negative absolute. +define <2 x double> @f2(<2 x double> %val) { +; CHECK-LABEL: f2: +; CHECK: vflndb %v24, %v24 +; CHECK: br %r14 + %abs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val) + %ret = fsub <2 x double> , %abs + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-add-01.ll b/llvm/test/CodeGen/SystemZ/vec-add-01.ll index a59a8da1cf8e..1de2aa2a1b92 100644 --- a/llvm/test/CodeGen/SystemZ/vec-add-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-add-01.ll @@ -37,3 +37,13 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { %ret = add <2 x i64> %val1, %val2 ret <2 x i64> %ret } + +; Test a v2f64 addition. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfadb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fadd <2 x double> %val1, %val2 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-cmp-06.ll b/llvm/test/CodeGen/SystemZ/vec-cmp-06.ll new file mode 100644 index 000000000000..bdb8744631ad --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-cmp-06.ll @@ -0,0 +1,337 @@ +; Test v2f64 comparisons. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test oeq. +define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfcedb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp oeq <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test one. +define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f2: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp one <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ogt. +define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f3: +; CHECK: vfchdb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp ogt <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test oge. +define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f4: +; CHECK: vfchedb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp oge <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ole. +define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfchedb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = fcmp ole <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test olt. +define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: vfchdb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = fcmp olt <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ueq. +define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f7: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ueq <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test une. +define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f8: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp une <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ugt. +define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f9: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ugt <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test uge. +define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f10: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uge <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ule. +define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f11: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ule <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ult. +define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f12: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ult <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test ord. +define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f13: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ord <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test uno. +define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f14: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uno <2 x double> %val1, %val2 + %ret = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %ret +} + +; Test oeq selects. +define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f15: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp oeq <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test one selects. +define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f16: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp one <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ogt selects. +define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f17: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ogt <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test oge selects. +define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f18: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp oge <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ole selects. +define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f19: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ole <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test olt selects. +define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f20: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp olt <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ueq selects. +define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f21: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ueq <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test une selects. +define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f22: +; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp une <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ugt selects. +define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f23: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ugt <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test uge selects. +define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f24: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uge <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ule selects. +define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f25: +; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ule <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ult selects. +define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f26: +; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ult <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test ord selects. +define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f27: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ord <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} + +; Test uno selects. +define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2, + <2 x double> %val3, <2 x double> %val4) { +; CHECK-LABEL: f28: +; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uno <2 x double> %val1, %val2 + %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-const-06.ll b/llvm/test/CodeGen/SystemZ/vec-const-06.ll new file mode 100644 index 000000000000..be53a0581ec2 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-const-06.ll @@ -0,0 +1,43 @@ +; Test vector byte masks, v2f64 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test an all-zeros vector. +define <2 x double> @f1() { +; CHECK-LABEL: f1: +; CHECK: vgbm %v24, 0 +; CHECK: br %r14 + ret <2 x double> zeroinitializer +} + +; Test an all-ones vector. +define <2 x double> @f2() { +; CHECK-LABEL: f2: +; CHECK: vgbm %v24, 65535 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a mixed vector (mask 0x8c76). +define <2 x double> @f3() { +; CHECK-LABEL: f3: +; CHECK: vgbm %v24, 35958 +; CHECK: br %r14 + ret <2 x double> +} + +; Test that undefs are treated as zero (mask 0x8c00). +define <2 x double> @f4() { +; CHECK-LABEL: f4: +; CHECK: vgbm %v24, 35840 +; CHECK: br %r14 + ret <2 x double> +} + +; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. +define <2 x double> @f5() { +; CHECK-LABEL: f5: +; CHECK-NOT: vgbm +; CHECK: br %r14 + ret <2 x double> +} diff --git a/llvm/test/CodeGen/SystemZ/vec-const-12.ll b/llvm/test/CodeGen/SystemZ/vec-const-12.ll new file mode 100644 index 000000000000..ca66a3d173eb --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-const-12.ll @@ -0,0 +1,169 @@ +; Test vector replicates, v2f64 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test a byte-granularity replicate with the lowest useful value. +define <2 x double> @f1() { +; CHECK-LABEL: f1: +; CHECK: vrepib %v24, 1 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a byte-granularity replicate with an arbitrary value. +define <2 x double> @f2() { +; CHECK-LABEL: f2: +; CHECK: vrepib %v24, -55 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a byte-granularity replicate with the highest useful value. +define <2 x double> @f3() { +; CHECK-LABEL: f3: +; CHECK: vrepib %v24, -2 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a halfword-granularity replicate with the lowest useful value. +define <2 x double> @f4() { +; CHECK-LABEL: f4: +; CHECK: vrepih %v24, 1 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a halfword-granularity replicate with an arbitrary value. +define <2 x double> @f5() { +; CHECK-LABEL: f5: +; CHECK: vrepih %v24, 25650 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a halfword-granularity replicate with the highest useful value. +define <2 x double> @f6() { +; CHECK-LABEL: f6: +; CHECK: vrepih %v24, -2 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate with the lowest useful positive value. +define <2 x double> @f7() { +; CHECK-LABEL: f7: +; CHECK: vrepif %v24, 1 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate with the highest in-range value. +define <2 x double> @f8() { +; CHECK-LABEL: f8: +; CHECK: vrepif %v24, 32767 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate with the next highest value. +; This cannot use VREPIF. +define <2 x double> @f9() { +; CHECK-LABEL: f9: +; CHECK-NOT: vrepif +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate with the lowest in-range value. +define <2 x double> @f10() { +; CHECK-LABEL: f10: +; CHECK: vrepif %v24, -32768 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate with the next lowest value. +; This cannot use VREPIF. +define <2 x double> @f11() { +; CHECK-LABEL: f11: +; CHECK-NOT: vrepif +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate with the highest useful negative value. +define <2 x double> @f12() { +; CHECK-LABEL: f12: +; CHECK: vrepif %v24, -2 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate with the lowest useful positive +; value. +define <2 x double> @f13() { +; CHECK-LABEL: f13: +; CHECK: vrepig %v24, 1 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate with the highest in-range value. +define <2 x double> @f14() { +; CHECK-LABEL: f14: +; CHECK: vrepig %v24, 32767 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate with the next highest value. +; This cannot use VREPIG. +define <2 x double> @f15() { +; CHECK-LABEL: f15: +; CHECK-NOT: vrepig +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate with the lowest in-range value. +define <2 x double> @f16() { +; CHECK-LABEL: f16: +; CHECK: vrepig %v24, -32768 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate with the next lowest value. +; This cannot use VREPIG. +define <2 x double> @f17() { +; CHECK-LABEL: f17: +; CHECK-NOT: vrepig +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate with the highest useful negative +; value. +define <2 x double> @f18() { +; CHECK-LABEL: f18: +; CHECK: vrepig %v24, -2 +; CHECK: br %r14 + ret <2 x double> +} + +; Repeat f14 with undefs optimistically treated as 32767. +define <2 x double> @f19() { +; CHECK-LABEL: f19: +; CHECK: vrepig %v24, 32767 +; CHECK: br %r14 + ret <2 x double> +} + +; Repeat f18 with undefs optimistically treated as -2. +define <2 x double> @f20() { +; CHECK-LABEL: f20: +; CHECK: vrepig %v24, -2 +; CHECK: br %r14 + ret <2 x double> +} diff --git a/llvm/test/CodeGen/SystemZ/vec-const-18.ll b/llvm/test/CodeGen/SystemZ/vec-const-18.ll new file mode 100644 index 000000000000..c6c20c2a0037 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-const-18.ll @@ -0,0 +1,85 @@ +; Test vector replicates that use VECTOR GENERATE MASK, v2f64 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test a word-granularity replicate with the lowest value that cannot use +; VREPIF. +define <2 x double> @f1() { +; CHECK-LABEL: f1: +; CHECK: vgmf %v24, 16, 16 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate that has the lower 17 bits set. +define <2 x double> @f2() { +; CHECK-LABEL: f2: +; CHECK: vgmf %v24, 15, 31 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate that has the upper 15 bits set. +define <2 x double> @f3() { +; CHECK-LABEL: f3: +; CHECK: vgmf %v24, 0, 14 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate that has middle bits set. +define <2 x double> @f4() { +; CHECK-LABEL: f4: +; CHECK: vgmf %v24, 2, 11 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a word-granularity replicate with a wrap-around mask. +define <2 x double> @f5() { +; CHECK-LABEL: f5: +; CHECK: vgmf %v24, 17, 15 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate with the lowest value that cannot +; use VREPIG. +define <2 x double> @f6() { +; CHECK-LABEL: f6: +; CHECK: vgmg %v24, 48, 48 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate that has the lower 22 bits set. +define <2 x double> @f7() { +; CHECK-LABEL: f7: +; CHECK: vgmg %v24, 42, 63 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate that has the upper 45 bits set. +define <2 x double> @f8() { +; CHECK-LABEL: f8: +; CHECK: vgmg %v24, 0, 44 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate that has middle bits set. +define <2 x double> @f9() { +; CHECK-LABEL: f9: +; CHECK: vgmg %v24, 2, 11 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a doubleword-granularity replicate with a wrap-around mask. +define <2 x double> @f10() { +; CHECK-LABEL: f10: +; CHECK: vgmg %v24, 10, 0 +; CHECK: br %r14 + ret <2 x double> +} diff --git a/llvm/test/CodeGen/SystemZ/vec-conv-01.ll b/llvm/test/CodeGen/SystemZ/vec-conv-01.ll new file mode 100644 index 000000000000..cbf42c0f533e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-conv-01.ll @@ -0,0 +1,95 @@ +; Test conversions between integer and float elements. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test conversion of f64s to signed i64s. +define <2 x i64> @f1(<2 x double> %doubles) { +; CHECK-LABEL: f1: +; CHECK: vcgdb %v24, %v24, 0, 5 +; CHECK: br %r14 + %dwords = fptosi <2 x double> %doubles to <2 x i64> + ret <2 x i64> %dwords +} + +; Test conversion of f64s to unsigned i64s. +define <2 x i64> @f2(<2 x double> %doubles) { +; CHECK-LABEL: f2: +; CHECK: vclgdb %v24, %v24, 0, 5 +; CHECK: br %r14 + %dwords = fptoui <2 x double> %doubles to <2 x i64> + ret <2 x i64> %dwords +} + +; Test conversion of signed i64s to f64s. +define <2 x double> @f3(<2 x i64> %dwords) { +; CHECK-LABEL: f3: +; CHECK: vcdgb %v24, %v24, 0, 0 +; CHECK: br %r14 + %doubles = sitofp <2 x i64> %dwords to <2 x double> + ret <2 x double> %doubles +} + +; Test conversion of unsigned i64s to f64s. +define <2 x double> @f4(<2 x i64> %dwords) { +; CHECK-LABEL: f4: +; CHECK: vcdlgb %v24, %v24, 0, 0 +; CHECK: br %r14 + %doubles = uitofp <2 x i64> %dwords to <2 x double> + ret <2 x double> %doubles +} + +; Test conversion of f64s to signed i32s, which must compile. +define void @f5(<2 x double> %doubles, <2 x i32> *%ptr) { + %words = fptosi <2 x double> %doubles to <2 x i32> + store <2 x i32> %words, <2 x i32> *%ptr + ret void +} + +; Test conversion of f64s to unsigned i32s, which must compile. +define void @f6(<2 x double> %doubles, <2 x i32> *%ptr) { + %words = fptoui <2 x double> %doubles to <2 x i32> + store <2 x i32> %words, <2 x i32> *%ptr + ret void +} + +; Test conversion of signed i32s to f64s, which must compile. +define <2 x double> @f7(<2 x i32> *%ptr) { + %words = load <2 x i32>, <2 x i32> *%ptr + %doubles = sitofp <2 x i32> %words to <2 x double> + ret <2 x double> %doubles +} + +; Test conversion of unsigned i32s to f64s, which must compile. +define <2 x double> @f8(<2 x i32> *%ptr) { + %words = load <2 x i32>, <2 x i32> *%ptr + %doubles = uitofp <2 x i32> %words to <2 x double> + ret <2 x double> %doubles +} + +; Test conversion of f32s to signed i64s, which must compile. +define <2 x i64> @f9(<2 x float> *%ptr) { + %floats = load <2 x float>, <2 x float> *%ptr + %dwords = fptosi <2 x float> %floats to <2 x i64> + ret <2 x i64> %dwords +} + +; Test conversion of f32s to unsigned i64s, which must compile. +define <2 x i64> @f10(<2 x float> *%ptr) { + %floats = load <2 x float>, <2 x float> *%ptr + %dwords = fptoui <2 x float> %floats to <2 x i64> + ret <2 x i64> %dwords +} + +; Test conversion of signed i64s to f32, which must compile. +define void @f11(<2 x i64> %dwords, <2 x float> *%ptr) { + %floats = sitofp <2 x i64> %dwords to <2 x float> + store <2 x float> %floats, <2 x float> *%ptr + ret void +} + +; Test conversion of unsigned i64s to f32, which must compile. +define void @f12(<2 x i64> %dwords, <2 x float> *%ptr) { + %floats = uitofp <2 x i64> %dwords to <2 x float> + store <2 x float> %floats, <2 x float> *%ptr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/vec-div-01.ll b/llvm/test/CodeGen/SystemZ/vec-div-01.ll index 3c5ec4f54ee6..5666444e9da3 100644 --- a/llvm/test/CodeGen/SystemZ/vec-div-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-div-01.ll @@ -1,5 +1,5 @@ -; Test vector division. There is no native support for this, so it's really -; a test of the operation legalization code. +; Test vector division. There is no native integer support for this, +; so the integer cases are really a test of the operation legalization code. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s @@ -60,3 +60,13 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { %ret = sdiv <2 x i64> %val1, %val2 ret <2 x i64> %ret } + +; Test a v2f64 division. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfddb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fdiv <2 x double> %val1, %val2 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-log-01.ll b/llvm/test/CodeGen/SystemZ/vec-log-01.ll new file mode 100644 index 000000000000..f9b7402f08e7 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-log-01.ll @@ -0,0 +1,15 @@ +; Test v2f64 logarithm. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare <2 x double> @llvm.log.v2f64(<2 x double>) + +define <2 x double> @f1(<2 x double> %val) { +; CHECK-LABEL: f1: +; CHECK: brasl %r14, log@PLT +; CHECK: brasl %r14, log@PLT +; CHECK: vmrhg %v24, +; CHECK: br %r14 + %ret = call <2 x double> @llvm.log.v2f64(<2 x double> %val) + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-01.ll b/llvm/test/CodeGen/SystemZ/vec-move-01.ll index 952e5a42126c..f9ae13b3ba11 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-01.ll @@ -33,3 +33,11 @@ define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) { ; CHECK: br %r14 ret <2 x i64> %val2 } + +; Test v2f64 moves. +define <2 x double> @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: vlr %v24, %v26 +; CHECK: br %r14 + ret <2 x double> %val2 +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-02.ll b/llvm/test/CodeGen/SystemZ/vec-move-02.ll index b7b3ab6798d8..a8c6912f0c73 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-02.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-02.ll @@ -38,6 +38,15 @@ define <2 x i64> @f4(<2 x i64> *%ptr) { ret <2 x i64> %ret } +; Test v2f64 loads. +define <2 x double> @f6(<2 x double> *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vl %v24, 0(%r2) +; CHECK: br %r14 + %ret = load <2 x double>, <2 x double> *%ptr + ret <2 x double> %ret +} + ; Test the highest aligned in-range offset. define <16 x i8> @f7(<16 x i8> *%base) { ; CHECK-LABEL: f7: diff --git a/llvm/test/CodeGen/SystemZ/vec-move-03.ll b/llvm/test/CodeGen/SystemZ/vec-move-03.ll index ddce4ef209a5..abd7c939fbed 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-03.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-03.ll @@ -38,6 +38,15 @@ define void @f4(<2 x i64> %val, <2 x i64> *%ptr) { ret void } +; Test v2f64 stores. +define void @f6(<2 x double> %val, <2 x double> *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vst %v24, 0(%r2) +; CHECK: br %r14 + store <2 x double> %val, <2 x double> *%ptr + ret void +} + ; Test the highest aligned in-range offset. define void @f7(<16 x i8> %val, <16 x i8> *%base) { ; CHECK-LABEL: f7: diff --git a/llvm/test/CodeGen/SystemZ/vec-move-04.ll b/llvm/test/CodeGen/SystemZ/vec-move-04.ll index f43c0b714912..4e75d21dc961 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-04.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-04.ll @@ -110,6 +110,34 @@ define <2 x i64> @f12(<2 x i64> %val, i64 %element, i32 %index) { ret <2 x i64> %ret } +; Test v2f64 insertion into the first element. +define <2 x double> @f16(<2 x double> %val, double %element) { +; CHECK-LABEL: f16: +; CHECK: vpdi %v24, %v0, %v24, 1 +; CHECK: br %r14 + %ret = insertelement <2 x double> %val, double %element, i32 0 + ret <2 x double> %ret +} + +; Test v2f64 insertion into the last element. +define <2 x double> @f17(<2 x double> %val, double %element) { +; CHECK-LABEL: f17: +; CHECK: vpdi %v24, %v24, %v0, 0 +; CHECK: br %r14 + %ret = insertelement <2 x double> %val, double %element, i32 1 + ret <2 x double> %ret +} + +; Test v2f64 insertion into a variable element. +define <2 x double> @f18(<2 x double> %val, double %element, i32 %index) { +; CHECK-LABEL: f18: +; CHECK: lgdr [[REG:%r[0-5]]], %f0 +; CHECK: vlvgg %v24, [[REG]], 0(%r2) +; CHECK: br %r14 + %ret = insertelement <2 x double> %val, double %element, i32 %index + ret <2 x double> %ret +} + ; Test v16i8 insertion into a variable element plus one. define <16 x i8> @f19(<16 x i8> %val, i8 %element, i32 %index) { ; CHECK-LABEL: f19: diff --git a/llvm/test/CodeGen/SystemZ/vec-move-05.ll b/llvm/test/CodeGen/SystemZ/vec-move-05.ll index 60a0666c2f9d..234157a0abb7 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-05.ll @@ -150,6 +150,41 @@ define i64 @f16(<2 x i64> %val, i32 %index) { ret i64 %ret } +; Test v2f64 extraction of the first element. +define double @f23(<2 x double> %val) { +; CHECK-LABEL: f23: +; CHECK: vlr %v0, %v24 +; CHECK: br %r14 + %ret = extractelement <2 x double> %val, i32 0 + ret double %ret +} + +; Test v2f64 extraction of the last element. +define double @f24(<2 x double> %val) { +; CHECK-LABEL: f24: +; CHECK: vrepg %v0, %v24, 1 +; CHECK: br %r14 + %ret = extractelement <2 x double> %val, i32 1 + ret double %ret +} + +; Test v2f64 extractions of an absurd element number. This must compile +; but we don't care what it does. +define double @f25(<2 x double> %val) { + %ret = extractelement <2 x double> %val, i32 100000 + ret double %ret +} + +; Test v2f64 extraction of a variable element. +define double @f26(<2 x double> %val, i32 %index) { +; CHECK-LABEL: f26: +; CHECK: vlgvg [[REG:%r[0-5]]], %v24, 0(%r2) +; CHECK: ldgr %f0, [[REG]] +; CHECK: br %r14 + %ret = extractelement <2 x double> %val, i32 %index + ret double %ret +} + ; Test v16i8 extraction of a variable element with an offset. define i8 @f27(<16 x i8> %val, i32 %index) { ; CHECK-LABEL: f27: diff --git a/llvm/test/CodeGen/SystemZ/vec-move-07.ll b/llvm/test/CodeGen/SystemZ/vec-move-07.ll index a688b089b97c..0cb8a0a1dfc5 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-07.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-07.ll @@ -37,3 +37,12 @@ define <2 x i64> @f4(i64 %val) { %ret = insertelement <2 x i64> undef, i64 %val, i32 0 ret <2 x i64> %ret } + +; Test v2f64, which is just a move. +define <2 x double> @f6(double %val) { +; CHECK-LABEL: f6: +; CHECK: vlr %v24, %v0 +; CHECK: br %r14 + %ret = insertelement <2 x double> undef, double %val, i32 0 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-08.ll b/llvm/test/CodeGen/SystemZ/vec-move-08.ll index 94a3b3aefba8..6148529c225d 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-08.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-08.ll @@ -214,6 +214,59 @@ define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) { ret <2 x i64> %ret } +; Test v2f64 insertion into the first element. +define <2 x double> @f26(<2 x double> %val, double *%ptr) { +; CHECK-LABEL: f26: +; CHECK: vleg %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = load double, double *%ptr + %ret = insertelement <2 x double> %val, double %element, i32 0 + ret <2 x double> %ret +} + +; Test v2f64 insertion into the last element. +define <2 x double> @f27(<2 x double> %val, double *%ptr) { +; CHECK-LABEL: f27: +; CHECK: vleg %v24, 0(%r2), 1 +; CHECK: br %r14 + %element = load double, double *%ptr + %ret = insertelement <2 x double> %val, double %element, i32 1 + ret <2 x double> %ret +} + +; Test v2f64 insertion with the highest in-range offset. +define <2 x double> @f28(<2 x double> %val, double *%base) { +; CHECK-LABEL: f28: +; CHECK: vleg %v24, 4088(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i32 511 + %element = load double, double *%ptr + %ret = insertelement <2 x double> %val, double %element, i32 1 + ret <2 x double> %ret +} + +; Test v2f64 insertion with the first ouf-of-range offset. +define <2 x double> @f29(<2 x double> %val, double *%base) { +; CHECK-LABEL: f29: +; CHECK: aghi %r2, 4096 +; CHECK: vleg %v24, 0(%r2), 0 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i32 512 + %element = load double, double *%ptr + %ret = insertelement <2 x double> %val, double %element, i32 0 + ret <2 x double> %ret +} + +; Test v2f64 insertion into a variable element. +define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) { +; CHECK-LABEL: f30: +; CHECK-NOT: vleg +; CHECK: br %r14 + %element = load double, double *%ptr + %ret = insertelement <2 x double> %val, double %element, i32 %index + ret <2 x double> %ret +} + ; Test a v4i32 gather of the first element. define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) { ; CHECK-LABEL: f31: @@ -282,3 +335,29 @@ define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) { %ret = insertelement <2 x i64> %val, i64 %element, i32 1 ret <2 x i64> %ret } + +; Test a v2f64 gather of the first element. +define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) { +; CHECK-LABEL: f38: +; CHECK: vgeg %v24, 0(%v26,%r2), 0 +; CHECK: br %r14 + %elem = extractelement <2 x i64> %index, i32 0 + %add = add i64 %base, %elem + %ptr = inttoptr i64 %add to double * + %element = load double, double *%ptr + %ret = insertelement <2 x double> %val, double %element, i32 0 + ret <2 x double> %ret +} + +; Test a v2f64 gather of the last element. +define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) { +; CHECK-LABEL: f39: +; CHECK: vgeg %v24, 0(%v26,%r2), 1 +; CHECK: br %r14 + %elem = extractelement <2 x i64> %index, i32 1 + %add = add i64 %base, %elem + %ptr = inttoptr i64 %add to double * + %element = load double, double *%ptr + %ret = insertelement <2 x double> %val, double %element, i32 1 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-09.ll b/llvm/test/CodeGen/SystemZ/vec-move-09.ll index 7863e4305f94..78c5454fb551 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-09.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-09.ll @@ -235,3 +235,30 @@ define <2 x i64> @f26(<2 x i64> %val, i32 %index) { %ret = insertelement <2 x i64> %val, i64 0, i32 %index ret <2 x i64> %ret } + +; Test v2f64 insertion of 0 into the first element. +define <2 x double> @f30(<2 x double> %val) { +; CHECK-LABEL: f30: +; CHECK: vleig %v24, 0, 0 +; CHECK: br %r14 + %ret = insertelement <2 x double> %val, double 0.0, i32 0 + ret <2 x double> %ret +} + +; Test v2f64 insertion of 0 into the last element. +define <2 x double> @f31(<2 x double> %val) { +; CHECK-LABEL: f31: +; CHECK: vleig %v24, 0, 1 +; CHECK: br %r14 + %ret = insertelement <2 x double> %val, double 0.0, i32 1 + ret <2 x double> %ret +} + +; Test v2f64 insertion of a nonzero value. +define <2 x double> @f32(<2 x double> %val) { +; CHECK-LABEL: f32: +; CHECK-NOT: vleig +; CHECK: br %r14 + %ret = insertelement <2 x double> %val, double 1.0, i32 1 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-10.ll b/llvm/test/CodeGen/SystemZ/vec-move-10.ll index 852a4a7c4ed5..bc854214bbd4 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-10.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-10.ll @@ -258,6 +258,59 @@ define void @f24(<2 x i64> %val, i64 *%ptr, i32 %index) { ret void } +; Test v2f64 extraction from the first element. +define void @f32(<2 x double> %val, double *%ptr) { +; CHECK-LABEL: f32: +; CHECK: vsteg %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = extractelement <2 x double> %val, i32 0 + store double %element, double *%ptr + ret void +} + +; Test v2f64 extraction from the last element. +define void @f33(<2 x double> %val, double *%ptr) { +; CHECK-LABEL: f33: +; CHECK: vsteg %v24, 0(%r2), 1 +; CHECK: br %r14 + %element = extractelement <2 x double> %val, i32 1 + store double %element, double *%ptr + ret void +} + +; Test v2f64 extraction with the highest in-range offset. +define void @f34(<2 x double> %val, double *%base) { +; CHECK-LABEL: f34: +; CHECK: vsteg %v24, 4088(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i32 511 + %element = extractelement <2 x double> %val, i32 1 + store double %element, double *%ptr + ret void +} + +; Test v2f64 extraction with the first ouf-of-range offset. +define void @f35(<2 x double> %val, double *%base) { +; CHECK-LABEL: f35: +; CHECK: aghi %r2, 4096 +; CHECK: vsteg %v24, 0(%r2), 0 +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i32 512 + %element = extractelement <2 x double> %val, i32 0 + store double %element, double *%ptr + ret void +} + +; Test v2f64 extraction from a variable element. +define void @f36(<2 x double> %val, double *%ptr, i32 %index) { +; CHECK-LABEL: f36: +; CHECK-NOT: vsteg +; CHECK: br %r14 + %element = extractelement <2 x double> %val, i32 %index + store double %element, double *%ptr + ret void +} + ; Test a v4i32 scatter of the first element. define void @f37(<4 x i32> %val, <4 x i32> %index, i64 %base) { ; CHECK-LABEL: f37: @@ -326,3 +379,29 @@ define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base) { store i64 %element, i64 *%ptr ret void } + +; Test a v2f64 scatter of the first element. +define void @f44(<2 x double> %val, <2 x i64> %index, i64 %base) { +; CHECK-LABEL: f44: +; CHECK: vsceg %v24, 0(%v26,%r2), 0 +; CHECK: br %r14 + %elem = extractelement <2 x i64> %index, i32 0 + %add = add i64 %base, %elem + %ptr = inttoptr i64 %add to double * + %element = extractelement <2 x double> %val, i32 0 + store double %element, double *%ptr + ret void +} + +; Test a v2f64 scatter of the last element. +define void @f45(<2 x double> %val, <2 x i64> %index, i64 %base) { +; CHECK-LABEL: f45: +; CHECK: vsceg %v24, 0(%v26,%r2), 1 +; CHECK: br %r14 + %elem = extractelement <2 x i64> %index, i32 1 + %add = add i64 %base, %elem + %ptr = inttoptr i64 %add to double * + %element = extractelement <2 x double> %val, i32 1 + store double %element, double *%ptr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-11.ll b/llvm/test/CodeGen/SystemZ/vec-move-11.ll index 45bc91b169ba..07a037ccdf25 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-11.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-11.ll @@ -91,3 +91,12 @@ define <2 x i64> @f10(i64 %val) { %ret = insertelement <2 x i64> undef, i64 %val, i32 1 ret <2 x i64> %ret } + +; Test v2f64 insertion into an undef. +define <2 x double> @f12(double %val) { +; CHECK-LABEL: f12: +; CHECK: vrepg %v24, %v0, 0 +; CHECK: br %r14 + %ret = insertelement <2 x double> undef, double %val, i32 1 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-12.ll b/llvm/test/CodeGen/SystemZ/vec-move-12.ll index 1fecab688e72..94b186f46e57 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-12.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-12.ll @@ -101,3 +101,13 @@ define <2 x i64> @f10(i64 *%ptr) { %ret = insertelement <2 x i64> undef, i64 %val, i32 1 ret <2 x i64> %ret } + +; Test v2f64 insertion into an undef. +define <2 x double> @f12(double *%ptr) { +; CHECK-LABEL: f12: +; CHECK: vlrepg %v24, 0(%r2) +; CHECK: br %r14 + %val = load double, double *%ptr + %ret = insertelement <2 x double> undef, double %val, i32 1 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-13.ll b/llvm/test/CodeGen/SystemZ/vec-move-13.ll index e103affa4b14..c50c94afb6cf 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-13.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-13.ll @@ -45,3 +45,13 @@ define <2 x i64> @f4(i64 %val) { %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 1 ret <2 x i64> %ret } + +; Test v2f64 insertion into 0. +define <2 x double> @f6(double %val) { +; CHECK-LABEL: f6: +; CHECK: vgbm [[REG:%v[0-9]+]], 0 +; CHECK: vmrhg %v24, [[REG]], %v0 +; CHECK: br %r14 + %ret = insertelement <2 x double> zeroinitializer, double %val, i32 1 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-move-14.ll b/llvm/test/CodeGen/SystemZ/vec-move-14.ll index f0c60e7d3662..b48f2175ebea 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-14.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-14.ll @@ -74,3 +74,13 @@ define <2 x i64> @f7(i64 *%ptr) { %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0 ret <2 x i64> %ret } + +; Test VLLEZG with a double. +define <2 x double> @f9(double *%ptr) { +; CHECK-LABEL: f9: +; CHECK: vllezg %v24, 0(%r2) +; CHECK: br %r14 + %val = load double, double *%ptr + %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-01.ll b/llvm/test/CodeGen/SystemZ/vec-mul-01.ll index 209582f5893b..d0018fa1f8c5 100644 --- a/llvm/test/CodeGen/SystemZ/vec-mul-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-mul-01.ll @@ -37,3 +37,13 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { %ret = mul <2 x i64> %val1, %val2 ret <2 x i64> %ret } + +; Test a v2f64 multiplication. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfmdb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fmul <2 x double> %val1, %val2 + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-02.ll b/llvm/test/CodeGen/SystemZ/vec-mul-02.ll index 7323330919a6..11a651e49975 100644 --- a/llvm/test/CodeGen/SystemZ/vec-mul-02.ll +++ b/llvm/test/CodeGen/SystemZ/vec-mul-02.ll @@ -2,6 +2,8 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) + ; Test a v16i8 multiply-and-add. define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) { @@ -34,3 +36,28 @@ define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2, %ret = add <4 x i32> %mul, %val3 ret <4 x i32> %ret } + +; Test a v2f64 multiply-and-add. +define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2, <2 x double> %val3) { +; CHECK-LABEL: f4: +; CHECK: vfmadb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1, + <2 x double> %val2, + <2 x double> %val3) + ret <2 x double> %ret +} + +; Test a v2f64 multiply-and-subtract. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2, <2 x double> %val3) { +; CHECK-LABEL: f5: +; CHECK: vfmsdb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <2 x double> , %val3 + %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1, + <2 x double> %val2, + <2 x double> %negval3) + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-neg-01.ll b/llvm/test/CodeGen/SystemZ/vec-neg-01.ll index 357648ba4d3a..491e24bb34f6 100644 --- a/llvm/test/CodeGen/SystemZ/vec-neg-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-neg-01.ll @@ -37,3 +37,12 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val) { %ret = sub <2 x i64> zeroinitializer, %val ret <2 x i64> %ret } + +; Test a v2f64 negation. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) { +; CHECK-LABEL: f5: +; CHECK: vflcdb %v24, %v26 +; CHECK: br %r14 + %ret = fsub <2 x double> , %val + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-01.ll b/llvm/test/CodeGen/SystemZ/vec-perm-01.ll index 520ff45e7f70..c68958a98a25 100644 --- a/llvm/test/CodeGen/SystemZ/vec-perm-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-perm-01.ll @@ -122,3 +122,23 @@ define <2 x i64> @f11(<2 x i64> %val) { <2 x i32> ret <2 x i64> %ret } + +; Test v2f64 splat of the first element. +define <2 x double> @f15(<2 x double> %val) { +; CHECK-LABEL: f15: +; CHECK: vrepg %v24, %v24, 0 +; CHECK: br %r14 + %ret = shufflevector <2 x double> %val, <2 x double> undef, + <2 x i32> zeroinitializer + ret <2 x double> %ret +} + +; Test v2f64 splat of the last element. +define <2 x double> @f16(<2 x double> %val) { +; CHECK-LABEL: f16: +; CHECK: vrepg %v24, %v24, 1 +; CHECK: br %r14 + %ret = shufflevector <2 x double> %val, <2 x double> undef, + <2 x i32> + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-02.ll b/llvm/test/CodeGen/SystemZ/vec-perm-02.ll index 93e4112c0efc..7158990174bd 100644 --- a/llvm/test/CodeGen/SystemZ/vec-perm-02.ll +++ b/llvm/test/CodeGen/SystemZ/vec-perm-02.ll @@ -142,3 +142,25 @@ define <2 x i64> @f11(i64 %scalar) { <2 x i32> ret <2 x i64> %ret } + +; Test v2f64 splat of the first element. +define <2 x double> @f15(double %scalar) { +; CHECK-LABEL: f15: +; CHECK: vrepg %v24, %v0, 0 +; CHECK: br %r14 + %val = insertelement <2 x double> undef, double %scalar, i32 0 + %ret = shufflevector <2 x double> %val, <2 x double> undef, + <2 x i32> zeroinitializer + ret <2 x double> %ret +} + +; Test v2f64 splat of the last element. +define <2 x double> @f16(double %scalar) { +; CHECK-LABEL: f16: +; CHECK: vrepg %v24, %v0, 0 +; CHECK: br %r14 + %val = insertelement <2 x double> undef, double %scalar, i32 1 + %ret = shufflevector <2 x double> %val, <2 x double> undef, + <2 x i32> + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-03.ll b/llvm/test/CodeGen/SystemZ/vec-perm-03.ll index d74948bdb516..c30a87601a43 100644 --- a/llvm/test/CodeGen/SystemZ/vec-perm-03.ll +++ b/llvm/test/CodeGen/SystemZ/vec-perm-03.ll @@ -158,6 +158,46 @@ define <2 x i64> @f12(i64 *%base) { ret <2 x i64> %ret } + +; Test a v2f64 replicating load with no offset. +define <2 x double> @f16(double *%ptr) { +; CHECK-LABEL: f16: +; CHECK: vlrepg %v24, 0(%r2) +; CHECK: br %r14 + %scalar = load double, double *%ptr + %val = insertelement <2 x double> undef, double %scalar, i32 0 + %ret = shufflevector <2 x double> %val, <2 x double> undef, + <2 x i32> zeroinitializer + ret <2 x double> %ret +} + +; Test a v2f64 replicating load with the maximum in-range offset. +define <2 x double> @f17(double *%base) { +; CHECK-LABEL: f17: +; CHECK: vlrepg %v24, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i32 511 + %scalar = load double, double *%ptr + %val = insertelement <2 x double> undef, double %scalar, i32 0 + %ret = shufflevector <2 x double> %val, <2 x double> undef, + <2 x i32> zeroinitializer + ret <2 x double> %ret +} + +; Test a v2f64 replicating load with the first out-of-range offset. +define <2 x double> @f18(double *%base) { +; CHECK-LABEL: f18: +; CHECK: aghi %r2, 4096 +; CHECK: vlrepg %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr double, double *%base, i32 512 + %scalar = load double, double *%ptr + %val = insertelement <2 x double> undef, double %scalar, i32 0 + %ret = shufflevector <2 x double> %val, <2 x double> undef, + <2 x i32> zeroinitializer + ret <2 x double> %ret +} + ; Test a v16i8 replicating load with an index. define <16 x i8> @f19(i8 *%base, i64 %index) { ; CHECK-LABEL: f19: diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-04.ll b/llvm/test/CodeGen/SystemZ/vec-perm-04.ll index 1d449b9bb343..ca04fdf69132 100644 --- a/llvm/test/CodeGen/SystemZ/vec-perm-04.ll +++ b/llvm/test/CodeGen/SystemZ/vec-perm-04.ll @@ -158,3 +158,23 @@ define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) { <2 x i32> ret <2 x i64> %ret } + +; Test a canonical v2f64 merge high. +define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f16: +; CHECK: vmrhg %v24, %v24, %v26 +; CHECK: br %r14 + %ret = shufflevector <2 x double> %val1, <2 x double> %val2, + <2 x i32> + ret <2 x double> %ret +} + +; Test a reversed v2f64 merge high. +define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f17: +; CHECK: vmrhg %v24, %v26, %v24 +; CHECK: br %r14 + %ret = shufflevector <2 x double> %val1, <2 x double> %val2, + <2 x i32> + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-05.ll b/llvm/test/CodeGen/SystemZ/vec-perm-05.ll index 636228c56baf..f4a46ff4e279 100644 --- a/llvm/test/CodeGen/SystemZ/vec-perm-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-perm-05.ll @@ -158,3 +158,23 @@ define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) { <2 x i32> ret <2 x i64> %ret } + +; Test a canonical v2f64 merge low. +define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f16: +; CHECK: vmrlg %v24, %v24, %v26 +; CHECK: br %r14 + %ret = shufflevector <2 x double> %val1, <2 x double> %val2, + <2 x i32> + ret <2 x double> %ret +} + +; Test a reversed v2f64 merge low. +define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f17: +; CHECK: vmrlg %v24, %v26, %v24 +; CHECK: br %r14 + %ret = shufflevector <2 x double> %val1, <2 x double> %val2, + <2 x i32> + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-08.ll b/llvm/test/CodeGen/SystemZ/vec-perm-08.ll index 4d06377f5a3b..b5220ab67126 100644 --- a/llvm/test/CodeGen/SystemZ/vec-perm-08.ll +++ b/llvm/test/CodeGen/SystemZ/vec-perm-08.ll @@ -128,3 +128,23 @@ define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2) { <2 x i32> ret <2 x i64> %ret } + +; Test a high1/low2 permute for v2f64. +define <2 x double> @f14(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f14: +; CHECK: vpdi %v24, %v24, %v26, 1 +; CHECK: br %r14 + %ret = shufflevector <2 x double> %val1, <2 x double> %val2, + <2 x i32> + ret <2 x double> %ret +} + +; Test a low2/high1 permute for v2f64. +define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f15: +; CHECK: vpdi %v24, %v26, %v24, 4 +; CHECK: br %r14 + %ret = shufflevector <2 x double> %val1, <2 x double> %val2, + <2 x i32> + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-round-01.ll b/llvm/test/CodeGen/SystemZ/vec-round-01.ll new file mode 100644 index 000000000000..284b83e96f72 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-round-01.ll @@ -0,0 +1,58 @@ +; Test v2f64 rounding. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare <2 x double> @llvm.rint.v2f64(<2 x double>) +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) +declare <2 x double> @llvm.round.v2f64(<2 x double>) + +define <2 x double> @f1(<2 x double> %val) { +; CHECK-LABEL: f1: +; CHECK: vfidb %v24, %v24, 0, 0 +; CHECK: br %r14 + %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %val) + ret <2 x double> %res +} + +define <2 x double> @f2(<2 x double> %val) { +; CHECK-LABEL: f2: +; CHECK: vfidb %v24, %v24, 4, 0 +; CHECK: br %r14 + %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %val) + ret <2 x double> %res +} + +define <2 x double> @f3(<2 x double> %val) { +; CHECK-LABEL: f3: +; CHECK: vfidb %v24, %v24, 4, 7 +; CHECK: br %r14 + %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %val) + ret <2 x double> %res +} + +define <2 x double> @f4(<2 x double> %val) { +; CHECK-LABEL: f4: +; CHECK: vfidb %v24, %v24, 4, 6 +; CHECK: br %r14 + %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %val) + ret <2 x double> %res +} + +define <2 x double> @f5(<2 x double> %val) { +; CHECK-LABEL: f5: +; CHECK: vfidb %v24, %v24, 4, 5 +; CHECK: br %r14 + %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %val) + ret <2 x double> %res +} + +define <2 x double> @f6(<2 x double> %val) { +; CHECK-LABEL: f6: +; CHECK: vfidb %v24, %v24, 4, 1 +; CHECK: br %r14 + %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/SystemZ/vec-sqrt-01.ll b/llvm/test/CodeGen/SystemZ/vec-sqrt-01.ll new file mode 100644 index 000000000000..0160c24a749c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-sqrt-01.ll @@ -0,0 +1,13 @@ +; Test v2f64 square root. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) + +define <2 x double> @f1(<2 x double> %val) { +; CHECK-LABEL: f1: +; CHECK: vfsqdb %v24, %v24 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val) + ret <2 x double> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll index 9e5b4f81e6d1..24d4ba5a2bdc 100644 --- a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll @@ -37,3 +37,13 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { %ret = sub <2 x i64> %val1, %val2 ret <2 x i64> %ret } + +; Test a v2f64 subtraction. +define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: vfsdb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fsub <2 x double> %val1, %val2 + ret <2 x double> %ret +}