diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e2dc29c7369e..179f600d4964 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -854,28 +854,18 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { SDOperand Flag; // Copy the result values into the output registers. - if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || - RVLocs[0].getLocReg() != X86::ST0) { - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), - Flag); - Flag = Chain.getValue(1); - } - } else { - // We need to handle a destination of ST0 specially, because it isn't really - // a register. - SDOperand Value = Op.getOperand(1); + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDOperand ValToCopy = Op.getOperand(i*2+1); - // an XMM register onto the fp-stack. Do this with an FP_EXTEND to f80. - // This will get legalized into a load/store if it can't get optimized away. - if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) - Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value); + // If this is a copy from an xmm register to ST(0), use an FPExtend to + // change the value to the FP stack register class. + if (RVLocs[i].getLocReg() == X86::ST0 && + isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) + ValToCopy = DAG.getNode(ISD::FP_EXTEND, MVT::f80, ValToCopy); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDOperand Ops[] = { Chain, Value }; - Chain = DAG.getNode(X86ISD::FP_SET_ST0, Tys, Ops, 2); + Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); } @@ -905,37 +895,31 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, SmallVector ResultVals; // Copy all of the result registers out of their specified physreg. - if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { - for (unsigned i = 0; i != RVLocs.size(); ++i) { - Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), - RVLocs[i].getValVT(), InFlag).getValue(1); - InFlag = Chain.getValue(2); - ResultVals.push_back(Chain.getValue(0)); + for (unsigned i = 0; i != RVLocs.size(); ++i) { + MVT::ValueType CopyVT = RVLocs[i].getValVT(); + + // If this is a call to a function that returns an fp value on the floating + // point stack, but where we prefer to use the value in xmm registers, copy + // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. + if (RVLocs[i].getLocReg() == X86::ST0 && + isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { + CopyVT = MVT::f80; } - } else { - // Copies from the FP stack are special, as ST0 isn't a valid register - // before the fp stackifier runs. - // Copy ST0 into an RFP register with FP_GET_RESULT. If this will end up - // in an SSE register, copy it out as F80 and do a truncate, otherwise use - // the specified value type. - MVT::ValueType GetResultTy = RVLocs[0].getValVT(); - if (isScalarFPTypeInSSEReg(GetResultTy)) - GetResultTy = MVT::f80; - SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag); - SDOperand GROps[] = { Chain, InFlag }; - SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0, Tys, GROps, 2); - Chain = RetVal.getValue(1); - InFlag = RetVal.getValue(2); + Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(), + CopyVT, InFlag).getValue(1); + SDOperand Val = Chain.getValue(0); + InFlag = Chain.getValue(2); - // If we want the result in an SSE register, use an FP_TRUNCATE to get it - // there. - if (GetResultTy != RVLocs[0].getValVT()) - RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal, - // This truncation won't change the value. - DAG.getIntPtrConstant(1)); + if (CopyVT != RVLocs[i].getValVT()) { + // Round the F80 the right size, which also moves to the appropriate xmm + // register. + Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val, + // This truncation won't change the value. + DAG.getIntPtrConstant(1)); + } - ResultVals.push_back(RetVal); + ResultVals.push_back(Val); } // Merge everything together with a MERGE_VALUES node. @@ -5573,9 +5557,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; - case X86ISD::FP_GET_ST0: return "X86ISD::FP_GET_ST0"; case X86ISD::FP_GET_ST0_ST1: return "X86ISD::FP_GET_ST0_ST1"; - case X86ISD::FP_SET_ST0: return "X86ISD::FP_SET_ST0"; case X86ISD::CALL: return "X86ISD::CALL"; case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index cb8d94d73cb1..fef9be312667 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -84,20 +84,10 @@ namespace llvm { /// as. FST, - /// FP_GET_ST0 - This corresponds to FpGET_ST0 pseudo instruction - /// which copies from ST(0) to the destination. It takes a chain and - /// writes a RFP result and a chain. - FP_GET_ST0, - /// FP_GET_ST0_ST1 - Same as FP_GET_ST0 except it copies two values /// ST(0) and ST(1). FP_GET_ST0_ST1, - /// FP_SET_ST0 - This corresponds to FpSET_ST0 pseudo instruction - /// which copies the source operand to ST(0). It takes a chain+value and - /// returns a chain and a flag. - FP_SET_ST0, - /// CALL/TAILCALL - These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 479aa183a007..d37ecf80899d 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -17,10 +17,8 @@ // FPStack specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>; def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, SDTCisVT<1, f80>]>; -def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; @@ -33,10 +31,6 @@ def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; -def X86fpget_st0 : SDNode<"X86ISD::FP_GET_ST0", SDTX86FpGet, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; -def X86fpset : SDNode<"X86ISD::FP_SET_ST0", SDTX86FpSet, - [SDNPHasChain, SDNPOutFlag]>; def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, [SDNPHasChain, SDNPMayLoad]>; def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, @@ -138,26 +132,18 @@ let isTerminator = 1 in // encoding and asm printing info). // Pseudo Instructions for FP stack return values. -def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, - [(set RFP32:$dst, X86fpget_st0)]>; // FPR = ST(0) -def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, - [(set RFP64:$dst, X86fpget_st0)]>; // FPR = ST(0) -def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, - [(set RFP80:$dst, X86fpget_st0)]>; // FPR = ST(0) +def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0) +def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0) +def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0) def FpGET_ST0_ST1 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP, []>; // FPR = ST(0), FPR = ST(1) let Defs = [ST0] in { -def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, - [(X86fpset RFP32:$src)]>;// ST(0) = FPR - -def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, - [(X86fpset RFP64:$src)]>;// ST(0) = FPR - -def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, - [(X86fpset RFP80:$src)]>;// ST(0) = FPR +def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR +def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR +def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR } // FpIf32, FpIf64 - Floating Point Psuedo Instruction template. diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index cb376c03cb81..93d2a510fbc4 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -481,14 +481,14 @@ def FR64 : RegisterClass<"X86", [f64], 64, // faster on common hardware. In reality, this should be controlled by a // command line option or something. -def RFP32 : RegisterClass<"X86", [f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; -def RFP64 : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; -def RFP80 : RegisterClass<"X86", [f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; // Floating point stack registers (these are not allocatable by the // register allocator - the floating point stackifier is responsible // for transforming FPn allocations to STn registers) -def RST : RegisterClass<"X86", [f80], 32, +def RST : RegisterClass<"X86", [f80, f64, f32], 32, [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> { let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const;