On x86-64, for a varargs function, don't store the xmm registers to
the register save area if %al is 0. This avoids touching xmm regsiters when they aren't actually used. llvm-svn: 79061
This commit is contained in:
parent
08479ae7fe
commit
0700a56860
|
@ -1527,37 +1527,44 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
||||||
// Store the integer parameter registers.
|
// Store the integer parameter registers.
|
||||||
SmallVector<SDValue, 8> MemOps;
|
SmallVector<SDValue, 8> MemOps;
|
||||||
SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
|
SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
|
||||||
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
|
unsigned Offset = VarArgsGPOffset;
|
||||||
DAG.getIntPtrConstant(VarArgsGPOffset));
|
|
||||||
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
|
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
|
||||||
|
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
|
||||||
|
DAG.getIntPtrConstant(Offset));
|
||||||
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
|
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
|
||||||
X86::GR64RegisterClass);
|
X86::GR64RegisterClass);
|
||||||
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
|
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
|
||||||
SDValue Store =
|
SDValue Store =
|
||||||
DAG.getStore(Val.getValue(1), dl, Val, FIN,
|
DAG.getStore(Val.getValue(1), dl, Val, FIN,
|
||||||
PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
|
PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
|
||||||
|
Offset);
|
||||||
MemOps.push_back(Store);
|
MemOps.push_back(Store);
|
||||||
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
|
Offset += 8;
|
||||||
DAG.getIntPtrConstant(8));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!MemOps.empty())
|
||||||
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||||
|
&MemOps[0], MemOps.size());
|
||||||
|
|
||||||
// Now store the XMM (fp + vector) parameter registers.
|
// Now store the XMM (fp + vector) parameter registers.
|
||||||
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
|
SmallVector<SDValue, 11> SaveXMMOps;
|
||||||
DAG.getIntPtrConstant(VarArgsFPOffset));
|
SaveXMMOps.push_back(Chain);
|
||||||
|
|
||||||
|
unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
|
||||||
|
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
|
||||||
|
SaveXMMOps.push_back(ALVal);
|
||||||
|
|
||||||
|
SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex));
|
||||||
|
SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset));
|
||||||
|
|
||||||
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
|
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
|
||||||
unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
|
unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
|
||||||
X86::VR128RegisterClass);
|
X86::VR128RegisterClass);
|
||||||
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
|
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
|
||||||
SDValue Store =
|
SaveXMMOps.push_back(Val);
|
||||||
DAG.getStore(Val.getValue(1), dl, Val, FIN,
|
|
||||||
PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
|
|
||||||
MemOps.push_back(Store);
|
|
||||||
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
|
|
||||||
DAG.getIntPtrConstant(16));
|
|
||||||
}
|
}
|
||||||
if (!MemOps.empty())
|
Chain = DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, MVT::Other,
|
||||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
&SaveXMMOps[0], SaveXMMOps.size());
|
||||||
&MemOps[0], MemOps.size());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7090,6 +7097,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||||
case X86ISD::DEC: return "X86ISD::DEC";
|
case X86ISD::DEC: return "X86ISD::DEC";
|
||||||
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
|
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
|
||||||
case X86ISD::PTEST: return "X86ISD::PTEST";
|
case X86ISD::PTEST: return "X86ISD::PTEST";
|
||||||
|
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7513,7 +7521,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
|
||||||
F->insert(MBBIter, newMBB);
|
F->insert(MBBIter, newMBB);
|
||||||
F->insert(MBBIter, nextMBB);
|
F->insert(MBBIter, nextMBB);
|
||||||
|
|
||||||
// Move all successors to thisMBB to nextMBB
|
// Move all successors of thisMBB to nextMBB
|
||||||
nextMBB->transferSuccessors(thisMBB);
|
nextMBB->transferSuccessors(thisMBB);
|
||||||
|
|
||||||
// Update thisMBB to fall through to newMBB
|
// Update thisMBB to fall through to newMBB
|
||||||
|
@ -7585,6 +7593,73 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
|
||||||
return nextMBB;
|
return nextMBB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock *
|
||||||
|
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
|
||||||
|
MachineInstr *MI,
|
||||||
|
MachineBasicBlock *MBB) const {
|
||||||
|
// Emit code to save XMM registers to the stack. The ABI says that the
|
||||||
|
// number of registers to save is given in %al, so it's theoretically
|
||||||
|
// possible to do an indirect jump trick to avoid saving all of them,
|
||||||
|
// however this code takes a simpler approach and just executes all
|
||||||
|
// of the stores if %al is non-zero. It's less code, and it's probably
|
||||||
|
// easier on the hardware branch predictor, and stores aren't all that
|
||||||
|
// expensive anyway.
|
||||||
|
|
||||||
|
// Create the new basic blocks. One block contains all the XMM stores,
|
||||||
|
// and one block is the final destination regardless of whether any
|
||||||
|
// stores were performed.
|
||||||
|
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
|
||||||
|
MachineFunction *F = MBB->getParent();
|
||||||
|
MachineFunction::iterator MBBIter = MBB;
|
||||||
|
++MBBIter;
|
||||||
|
MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||||
|
MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||||
|
F->insert(MBBIter, XMMSaveMBB);
|
||||||
|
F->insert(MBBIter, EndMBB);
|
||||||
|
|
||||||
|
// Set up the CFG.
|
||||||
|
// Move any original successors of MBB to the end block.
|
||||||
|
EndMBB->transferSuccessors(MBB);
|
||||||
|
// The original block will now fall through to the XMM save block.
|
||||||
|
MBB->addSuccessor(XMMSaveMBB);
|
||||||
|
// The XMMSaveMBB will fall through to the end block.
|
||||||
|
XMMSaveMBB->addSuccessor(EndMBB);
|
||||||
|
|
||||||
|
// Now add the instructions.
|
||||||
|
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||||
|
DebugLoc DL = MI->getDebugLoc();
|
||||||
|
|
||||||
|
unsigned CountReg = MI->getOperand(0).getReg();
|
||||||
|
int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
|
||||||
|
int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
|
||||||
|
|
||||||
|
if (!Subtarget->isTargetWin64()) {
|
||||||
|
// If %al is 0, branch around the XMM save block.
|
||||||
|
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
|
||||||
|
BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB);
|
||||||
|
MBB->addSuccessor(EndMBB);
|
||||||
|
}
|
||||||
|
|
||||||
|
// In the XMM save block, save all the XMM argument registers.
|
||||||
|
for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
|
||||||
|
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
|
||||||
|
BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
|
||||||
|
.addFrameIndex(RegSaveFrameIndex)
|
||||||
|
.addImm(/*Scale=*/1)
|
||||||
|
.addReg(/*IndexReg=*/0)
|
||||||
|
.addImm(/*Disp=*/Offset)
|
||||||
|
.addReg(/*Segment=*/0)
|
||||||
|
.addReg(MI->getOperand(i).getReg())
|
||||||
|
.addMemOperand(MachineMemOperand(
|
||||||
|
PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
|
||||||
|
MachineMemOperand::MOStore, Offset,
|
||||||
|
/*Size=*/16, /*Align=*/16));
|
||||||
|
}
|
||||||
|
|
||||||
|
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
|
||||||
|
|
||||||
|
return EndMBB;
|
||||||
|
}
|
||||||
|
|
||||||
MachineBasicBlock *
|
MachineBasicBlock *
|
||||||
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||||
|
@ -7888,6 +7963,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||||
X86::MOV32rr, X86::MOV32rr,
|
X86::MOV32rr, X86::MOV32rr,
|
||||||
X86::MOV32ri, X86::MOV32ri,
|
X86::MOV32ri, X86::MOV32ri,
|
||||||
false);
|
false);
|
||||||
|
case X86::VASTART_SAVE_XMM_REGS:
|
||||||
|
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -243,7 +243,12 @@ namespace llvm {
|
||||||
MUL_IMM,
|
MUL_IMM,
|
||||||
|
|
||||||
// PTEST - Vector bitwise comparisons
|
// PTEST - Vector bitwise comparisons
|
||||||
PTEST
|
PTEST,
|
||||||
|
|
||||||
|
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
|
||||||
|
// according to %al. An operator is needed so that this can be expanded
|
||||||
|
// with control flow.
|
||||||
|
VASTART_SAVE_XMM_REGS
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -715,6 +720,11 @@ namespace llvm {
|
||||||
MachineBasicBlock *BB,
|
MachineBasicBlock *BB,
|
||||||
unsigned cmovOpc) const;
|
unsigned cmovOpc) const;
|
||||||
|
|
||||||
|
/// Utility function to emit the xmm reg save portion of va_start.
|
||||||
|
MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
|
||||||
|
MachineInstr *BInstr,
|
||||||
|
MachineBasicBlock *BB) const;
|
||||||
|
|
||||||
/// Emit nodes that will be selected as "test Op0,Op0", or something
|
/// Emit nodes that will be selected as "test Op0,Op0", or something
|
||||||
/// equivalent, for use with the given x86 condition code.
|
/// equivalent, for use with the given x86 condition code.
|
||||||
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
|
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
|
||||||
|
|
|
@ -56,6 +56,10 @@ def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
|
||||||
|
|
||||||
def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
|
def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
|
||||||
|
|
||||||
|
def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
|
||||||
|
SDTCisVT<1, iPTR>,
|
||||||
|
SDTCisVT<2, iPTR>]>;
|
||||||
|
|
||||||
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
|
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
|
||||||
|
|
||||||
def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
|
def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
|
||||||
|
@ -114,6 +118,11 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
|
||||||
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
|
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
|
||||||
[SDNPHasChain, SDNPOptInFlag]>;
|
[SDNPHasChain, SDNPOptInFlag]>;
|
||||||
|
|
||||||
|
def X86vastart_save_xmm_regs :
|
||||||
|
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
|
||||||
|
SDT_X86VASTART_SAVE_XMM_REGS,
|
||||||
|
[SDNPHasChain]>;
|
||||||
|
|
||||||
def X86callseq_start :
|
def X86callseq_start :
|
||||||
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
|
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
|
||||||
[SDNPHasChain, SDNPOutFlag]>;
|
[SDNPHasChain, SDNPOutFlag]>;
|
||||||
|
@ -511,6 +520,18 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
||||||
Requires<[In32BitMode]>;
|
Requires<[In32BitMode]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// x86-64 va_start lowering magic.
|
||||||
|
let usesCustomDAGSchedInserter = 1 in
|
||||||
|
def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
|
||||||
|
(outs),
|
||||||
|
(ins GR8:$al,
|
||||||
|
i64imm:$regsavefi, i64imm:$offset,
|
||||||
|
variable_ops),
|
||||||
|
"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
|
||||||
|
[(X86vastart_save_xmm_regs GR8:$al,
|
||||||
|
imm:$regsavefi,
|
||||||
|
imm:$offset)]>;
|
||||||
|
|
||||||
// Nop
|
// Nop
|
||||||
let neverHasSideEffects = 1 in {
|
let neverHasSideEffects = 1 in {
|
||||||
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
|
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86-64 | grep {testb \[%\]al, \[%\]al}
|
||||||
|
|
||||||
|
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
|
||||||
|
|
||||||
|
define void @foo(i32 %x, ...) nounwind {
|
||||||
|
entry:
|
||||||
|
%ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
|
||||||
|
%ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
|
||||||
|
call void @llvm.va_start(i8* %ap12)
|
||||||
|
%ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
|
||||||
|
call void @bar(%struct.__va_list_tag* %ap3) nounwind
|
||||||
|
call void @llvm.va_end(i8* %ap12)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.va_start(i8*) nounwind
|
||||||
|
|
||||||
|
declare void @bar(%struct.__va_list_tag*)
|
||||||
|
|
||||||
|
declare void @llvm.va_end(i8*) nounwind
|
Loading…
Reference in New Issue