Eliminate the FP_GET_ST0/FP_SET_ST0 target-specific dag nodes, just lower to

copyfromreg/copytoreg instead.

llvm-svn: 48174
This commit is contained in:
Chris Lattner 2008-03-10 21:08:41 +00:00
parent d6cff517d6
commit 4b3a7fa823
4 changed files with 41 additions and 83 deletions

View File

@ -854,28 +854,18 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
SDOperand Flag;
// Copy the result values into the output registers.
if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
RVLocs[0].getLocReg() != X86::ST0) {
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
Flag);
Flag = Chain.getValue(1);
}
} else {
// We need to handle a destination of ST0 specially, because it isn't really
// a register.
SDOperand Value = Op.getOperand(1);
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDOperand ValToCopy = Op.getOperand(i*2+1);
// an XMM register onto the fp-stack. Do this with an FP_EXTEND to f80.
// This will get legalized into a load/store if it can't get optimized away.
if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT()))
Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value);
// If this is a copy from an xmm register to ST(0), use an FPExtend to
// change the value to the FP stack register class.
if (RVLocs[i].getLocReg() == X86::ST0 &&
isScalarFPTypeInSSEReg(RVLocs[i].getValVT()))
ValToCopy = DAG.getNode(ISD::FP_EXTEND, MVT::f80, ValToCopy);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
SDOperand Ops[] = { Chain, Value };
Chain = DAG.getNode(X86ISD::FP_SET_ST0, Tys, Ops, 2);
Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
@ -905,37 +895,31 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
SmallVector<SDOperand, 8> ResultVals;
// Copy all of the result registers out of their specified physreg.
if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) {
for (unsigned i = 0; i != RVLocs.size(); ++i) {
Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
RVLocs[i].getValVT(), InFlag).getValue(1);
InFlag = Chain.getValue(2);
ResultVals.push_back(Chain.getValue(0));
for (unsigned i = 0; i != RVLocs.size(); ++i) {
MVT::ValueType CopyVT = RVLocs[i].getValVT();
// If this is a call to a function that returns an fp value on the floating
// point stack, but where we prefer to use the value in xmm registers, copy
// it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
if (RVLocs[i].getLocReg() == X86::ST0 &&
isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
CopyVT = MVT::f80;
}
} else {
// Copies from the FP stack are special, as ST0 isn't a valid register
// before the fp stackifier runs.
// Copy ST0 into an RFP register with FP_GET_RESULT. If this will end up
// in an SSE register, copy it out as F80 and do a truncate, otherwise use
// the specified value type.
MVT::ValueType GetResultTy = RVLocs[0].getValVT();
if (isScalarFPTypeInSSEReg(GetResultTy))
GetResultTy = MVT::f80;
SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
SDOperand GROps[] = { Chain, InFlag };
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0, Tys, GROps, 2);
Chain = RetVal.getValue(1);
InFlag = RetVal.getValue(2);
Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
CopyVT, InFlag).getValue(1);
SDOperand Val = Chain.getValue(0);
InFlag = Chain.getValue(2);
// If we want the result in an SSE register, use an FP_TRUNCATE to get it
// there.
if (GetResultTy != RVLocs[0].getValVT())
RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
if (CopyVT != RVLocs[i].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register.
Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
}
ResultVals.push_back(RetVal);
ResultVals.push_back(Val);
}
// Merge everything together with a MERGE_VALUES node.
@ -5573,9 +5557,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::FP_GET_ST0: return "X86ISD::FP_GET_ST0";
case X86ISD::FP_GET_ST0_ST1: return "X86ISD::FP_GET_ST0_ST1";
case X86ISD::FP_SET_ST0: return "X86ISD::FP_SET_ST0";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";

View File

@ -84,20 +84,10 @@ namespace llvm {
/// as.
FST,
/// FP_GET_ST0 - This corresponds to FpGET_ST0 pseudo instruction
/// which copies from ST(0) to the destination. It takes a chain and
/// writes a RFP result and a chain.
FP_GET_ST0,
/// FP_GET_ST0_ST1 - Same as FP_GET_ST0 except it copies two values
/// ST(0) and ST(1).
FP_GET_ST0_ST1,
/// FP_SET_ST0 - This corresponds to FpSET_ST0 pseudo instruction
/// which copies the source operand to ST(0). It takes a chain+value and
/// returns a chain and a flag.
FP_SET_ST0,
/// CALL/TAILCALL - These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:

View File

@ -17,10 +17,8 @@
// FPStack specific DAG Nodes.
//===----------------------------------------------------------------------===//
def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
SDTCisVT<1, f80>]>;
def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
@ -33,10 +31,6 @@ def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fpget_st0 : SDNode<"X86ISD::FP_GET_ST0", SDTX86FpGet,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
def X86fpset : SDNode<"X86ISD::FP_SET_ST0", SDTX86FpSet,
[SDNPHasChain, SDNPOutFlag]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
@ -138,26 +132,18 @@ let isTerminator = 1 in
// encoding and asm printing info).
// Pseudo Instructions for FP stack return values.
def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP,
[(set RFP32:$dst, X86fpget_st0)]>; // FPR = ST(0)
def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP,
[(set RFP64:$dst, X86fpget_st0)]>; // FPR = ST(0)
def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP,
[(set RFP80:$dst, X86fpget_st0)]>; // FPR = ST(0)
def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
def FpGET_ST0_ST1 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP,
[]>; // FPR = ST(0), FPR = ST(1)
let Defs = [ST0] in {
def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP,
[(X86fpset RFP32:$src)]>;// ST(0) = FPR
def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP,
[(X86fpset RFP64:$src)]>;// ST(0) = FPR
def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP,
[(X86fpset RFP80:$src)]>;// ST(0) = FPR
def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR
def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR
def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR
}
// FpIf32, FpIf64 - Floating Point Psuedo Instruction template.

View File

@ -481,14 +481,14 @@ def FR64 : RegisterClass<"X86", [f64], 64,
// faster on common hardware. In reality, this should be controlled by a
// command line option or something.
def RFP32 : RegisterClass<"X86", [f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
def RFP64 : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
def RFP80 : RegisterClass<"X86", [f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
// Floating point stack registers (these are not allocatable by the
// register allocator - the floating point stackifier is responsible
// for transforming FPn allocations to STn registers)
def RST : RegisterClass<"X86", [f80], 32,
def RST : RegisterClass<"X86", [f80, f64, f32], 32,
[ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;