rename FpGETRESULT32 -> FpGET_ST0_32 etc. Add support for
isel'ing value preserving FP roundings from one fp stack reg to another into a noop, instead of stack traffic. llvm-svn: 48093
This commit is contained in:
parent
a6ce71fb84
commit
d587e580a6
|
@ -214,7 +214,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
|
||||||
|
|
||||||
MachineInstr *PrevMI = 0;
|
MachineInstr *PrevMI = 0;
|
||||||
if (I != BB.begin())
|
if (I != BB.begin())
|
||||||
PrevMI = prior(I);
|
PrevMI = prior(I);
|
||||||
|
|
||||||
++NumFP; // Keep track of # of pseudo instrs
|
++NumFP; // Keep track of # of pseudo instrs
|
||||||
DOUT << "\nFPInst:\t" << *MI;
|
DOUT << "\nFPInst:\t" << *MI;
|
||||||
|
@ -917,13 +917,13 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
|
||||||
MachineInstr *MI = I;
|
MachineInstr *MI = I;
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default: assert(0 && "Unknown SpecialFP instruction!");
|
default: assert(0 && "Unknown SpecialFP instruction!");
|
||||||
case X86::FpGETRESULT32: // Appears immediately after a call returning FP type!
|
case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
|
||||||
case X86::FpGETRESULT64: // Appears immediately after a call returning FP type!
|
case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
|
||||||
case X86::FpGETRESULT80:
|
case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
|
||||||
assert(StackTop == 0 && "Stack should be empty after a call!");
|
assert(StackTop == 0 && "Stack should be empty after a call!");
|
||||||
pushReg(getFPReg(MI->getOperand(0)));
|
pushReg(getFPReg(MI->getOperand(0)));
|
||||||
break;
|
break;
|
||||||
case X86::FpGETRESULT80x2:
|
case X86::FpGET_ST0_ST1:
|
||||||
assert(StackTop == 0 && "Stack should be empty after a call!");
|
assert(StackTop == 0 && "Stack should be empty after a call!");
|
||||||
pushReg(getFPReg(MI->getOperand(0)));
|
pushReg(getFPReg(MI->getOperand(0)));
|
||||||
pushReg(getFPReg(MI->getOperand(1)));
|
pushReg(getFPReg(MI->getOperand(1)));
|
||||||
|
|
|
@ -486,10 +486,15 @@ void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) {
|
||||||
if (SrcIsSSE && DstIsSSE)
|
if (SrcIsSSE && DstIsSSE)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// If this is an FPStack extension (but not a truncation), it is a noop.
|
if (!SrcIsSSE && !DstIsSSE) {
|
||||||
if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND)
|
// If this is an FPStack extension, it is a noop.
|
||||||
continue;
|
if (N->getOpcode() == ISD::FP_EXTEND)
|
||||||
|
continue;
|
||||||
|
// If this is a value-preserving FPStack truncation, it is a noop.
|
||||||
|
if (N->getConstantOperandVal(1))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
|
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
|
||||||
// FPStack has extload and truncstore. SSE can fold direct loads into other
|
// FPStack has extload and truncstore. SSE can fold direct loads into other
|
||||||
// operations. Based on this, decide what we want to do.
|
// operations. Based on this, decide what we want to do.
|
||||||
|
@ -1150,7 +1155,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
||||||
case X86ISD::GlobalBaseReg:
|
case X86ISD::GlobalBaseReg:
|
||||||
return getGlobalBaseReg();
|
return getGlobalBaseReg();
|
||||||
|
|
||||||
case X86ISD::FP_GET_RESULT2: {
|
case X86ISD::FP_GET_ST0_ST1: {
|
||||||
SDOperand Chain = N.getOperand(0);
|
SDOperand Chain = N.getOperand(0);
|
||||||
SDOperand InFlag = N.getOperand(1);
|
SDOperand InFlag = N.getOperand(1);
|
||||||
AddToISelQueue(Chain);
|
AddToISelQueue(Chain);
|
||||||
|
@ -1161,7 +1166,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
||||||
Tys.push_back(MVT::Other);
|
Tys.push_back(MVT::Other);
|
||||||
Tys.push_back(MVT::Flag);
|
Tys.push_back(MVT::Flag);
|
||||||
SDOperand Ops[] = { Chain, InFlag };
|
SDOperand Ops[] = { Chain, InFlag };
|
||||||
SDNode *ResNode = CurDAG->getTargetNode(X86::FpGETRESULT80x2, Tys,
|
SDNode *ResNode = CurDAG->getTargetNode(X86::FpGET_ST0_ST1, Tys,
|
||||||
Ops, 2);
|
Ops, 2);
|
||||||
Chain = SDOperand(ResNode, 2);
|
Chain = SDOperand(ResNode, 2);
|
||||||
InFlag = SDOperand(ResNode, 3);
|
InFlag = SDOperand(ResNode, 3);
|
||||||
|
|
|
@ -917,9 +917,8 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
|
||||||
if (isScalarFPTypeInSSEReg(GetResultTy))
|
if (isScalarFPTypeInSSEReg(GetResultTy))
|
||||||
GetResultTy = MVT::f80;
|
GetResultTy = MVT::f80;
|
||||||
SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
|
SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
|
||||||
|
|
||||||
SDOperand GROps[] = { Chain, InFlag };
|
SDOperand GROps[] = { Chain, InFlag };
|
||||||
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
|
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0, Tys, GROps, 2);
|
||||||
Chain = RetVal.getValue(1);
|
Chain = RetVal.getValue(1);
|
||||||
InFlag = RetVal.getValue(2);
|
InFlag = RetVal.getValue(2);
|
||||||
|
|
||||||
|
@ -969,7 +968,7 @@ LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag,
|
||||||
const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag };
|
const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag };
|
||||||
SDVTList Tys = DAG.getVTList(VTs, 4);
|
SDVTList Tys = DAG.getVTList(VTs, 4);
|
||||||
SDOperand Ops[] = { Chain, InFlag };
|
SDOperand Ops[] = { Chain, InFlag };
|
||||||
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2);
|
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0_ST1, Tys, Ops, 2);
|
||||||
Chain = RetVal.getValue(2);
|
Chain = RetVal.getValue(2);
|
||||||
SDOperand FIN = TheCall->getOperand(5);
|
SDOperand FIN = TheCall->getOperand(5);
|
||||||
Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0);
|
Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0);
|
||||||
|
@ -5564,8 +5563,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||||
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
|
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
|
||||||
case X86ISD::FLD: return "X86ISD::FLD";
|
case X86ISD::FLD: return "X86ISD::FLD";
|
||||||
case X86ISD::FST: return "X86ISD::FST";
|
case X86ISD::FST: return "X86ISD::FST";
|
||||||
case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
|
case X86ISD::FP_GET_ST0: return "X86ISD::FP_GET_ST0";
|
||||||
case X86ISD::FP_GET_RESULT2: return "X86ISD::FP_GET_RESULT2";
|
case X86ISD::FP_GET_ST0_ST1: return "X86ISD::FP_GET_ST0_ST1";
|
||||||
case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
|
case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
|
||||||
case X86ISD::CALL: return "X86ISD::CALL";
|
case X86ISD::CALL: return "X86ISD::CALL";
|
||||||
case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
|
case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
|
||||||
|
|
|
@ -84,14 +84,14 @@ namespace llvm {
|
||||||
/// as.
|
/// as.
|
||||||
FST,
|
FST,
|
||||||
|
|
||||||
/// FP_GET_RESULT - This corresponds to FpGETRESULT pseudo instruction
|
/// FP_GET_ST0 - This corresponds to FpGET_ST0 pseudo instruction
|
||||||
/// which copies from ST(0) to the destination. It takes a chain and
|
/// which copies from ST(0) to the destination. It takes a chain and
|
||||||
/// writes a RFP result and a chain.
|
/// writes a RFP result and a chain.
|
||||||
FP_GET_RESULT,
|
FP_GET_ST0,
|
||||||
|
|
||||||
/// FP_GET_RESULT2 - Same as FP_GET_RESULT except it copies two values
|
/// FP_GET_ST0_ST1 - Same as FP_GET_RESULT except it copies two values
|
||||||
/// ST(0) and ST(1).
|
/// ST(0) and ST(1).
|
||||||
FP_GET_RESULT2,
|
FP_GET_ST0_ST1,
|
||||||
|
|
||||||
/// FP_SET_RESULT - This corresponds to FpSETRESULT pseudo instruction
|
/// FP_SET_RESULT - This corresponds to FpSETRESULT pseudo instruction
|
||||||
/// which copies the source operand to ST(0). It takes a chain+value and
|
/// which copies the source operand to ST(0). It takes a chain+value and
|
||||||
|
|
|
@ -18,7 +18,8 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
|
def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
|
||||||
def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
|
def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
|
||||||
|
SDTCisVT<1, f80>]>;
|
||||||
def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
|
def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
|
||||||
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
|
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
|
||||||
SDTCisPtrTy<1>,
|
SDTCisPtrTy<1>,
|
||||||
|
@ -32,9 +33,7 @@ def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
|
||||||
|
|
||||||
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
|
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
|
||||||
|
|
||||||
def X86fpget : SDNode<"X86ISD::FP_GET_RESULT", SDTX86FpGet,
|
def X86fpget_st0 : SDNode<"X86ISD::FP_GET_ST0", SDTX86FpGet,
|
||||||
[SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
|
|
||||||
def X86fpget2 : SDNode<"X86ISD::FP_GET_RESULT2", SDTX86FpGet2,
|
|
||||||
[SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
|
[SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
|
||||||
def X86fpset : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet,
|
def X86fpset : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet,
|
||||||
[SDNPHasChain, SDNPOutFlag]>;
|
[SDNPHasChain, SDNPOutFlag]>;
|
||||||
|
@ -139,17 +138,15 @@ let isTerminator = 1 in
|
||||||
// encoding and asm printing info).
|
// encoding and asm printing info).
|
||||||
|
|
||||||
// Pseudo Instructions for FP stack return values.
|
// Pseudo Instructions for FP stack return values.
|
||||||
def FpGETRESULT32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP,
|
def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP,
|
||||||
[(set RFP32:$dst, X86fpget)]>; // FPR = ST(0)
|
[(set RFP32:$dst, X86fpget_st0)]>; // FPR = ST(0)
|
||||||
|
def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP,
|
||||||
|
[(set RFP64:$dst, X86fpget_st0)]>; // FPR = ST(0)
|
||||||
|
def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP,
|
||||||
|
[(set RFP80:$dst, X86fpget_st0)]>; // FPR = ST(0)
|
||||||
|
|
||||||
def FpGETRESULT64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP,
|
def FpGET_ST0_ST1 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP,
|
||||||
[(set RFP64:$dst, X86fpget)]>; // FPR = ST(0)
|
[]>; // FPR = ST(0), FPR = ST(1)
|
||||||
|
|
||||||
def FpGETRESULT80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP,
|
|
||||||
[(set RFP80:$dst, X86fpget)]>; // FPR = ST(0)
|
|
||||||
|
|
||||||
def FpGETRESULT80x2 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP,
|
|
||||||
[]>; // FPR = ST(0), FPR = ST(1)
|
|
||||||
|
|
||||||
|
|
||||||
let Defs = [ST0] in {
|
let Defs = [ST0] in {
|
||||||
|
@ -174,15 +171,15 @@ class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
|
||||||
|
|
||||||
// Register copies. Just copies, the shortening ones do not truncate.
|
// Register copies. Just copies, the shortening ones do not truncate.
|
||||||
let neverHasSideEffects = 1 in {
|
let neverHasSideEffects = 1 in {
|
||||||
def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>;
|
def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>;
|
||||||
def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>;
|
def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>;
|
||||||
def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>;
|
def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>;
|
||||||
def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>;
|
def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>;
|
||||||
def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>;
|
def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>;
|
||||||
def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>;
|
def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>;
|
||||||
def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>;
|
def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>;
|
||||||
def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>;
|
def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>;
|
||||||
def MOV_Fp8080 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>;
|
def MOV_Fp8080 : FpI_ <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Factoring for arithmetic.
|
// Factoring for arithmetic.
|
||||||
|
@ -583,6 +580,21 @@ def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
|
||||||
// Used to conv. i64 to f64 since there isn't a SSE version.
|
// Used to conv. i64 to f64 since there isn't a SSE version.
|
||||||
def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
|
def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
|
||||||
|
|
||||||
def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStackf32]>;
|
// FP extensions map onto simple pseudo-value conversions if they are to/from
|
||||||
def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStackf32]>;
|
// the FP stack.
|
||||||
def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStackf64]>;
|
def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>,
|
||||||
|
Requires<[FPStackf32]>;
|
||||||
|
def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>,
|
||||||
|
Requires<[FPStackf32]>;
|
||||||
|
def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>,
|
||||||
|
Requires<[FPStackf64]>;
|
||||||
|
|
||||||
|
// FP truncations map onto simple pseudo-value conversions if they are to/from
|
||||||
|
// the FP stack. We have validated that only value-preserving truncations make
|
||||||
|
// it through isel.
|
||||||
|
def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>,
|
||||||
|
Requires<[FPStackf32]>;
|
||||||
|
def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>,
|
||||||
|
Requires<[FPStackf32]>;
|
||||||
|
def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>,
|
||||||
|
Requires<[FPStackf64]>;
|
||||||
|
|
Loading…
Reference in New Issue