[AArch64] Implement support for windows style vararg functions
Pass parameters properly in calls to such functions (pass all floats in integer registers), and handle va_start properly (allocate stack immediately below the arguments on the stack, to save the register arguments into a single continuous array). Differential Revision: https://reviews.llvm.org/D35006 llvm-svn: 307928
This commit is contained in:
parent
8c0317da02
commit
68266faa31
|
@ -118,6 +118,13 @@ def RetCC_AArch64_AAPCS : CallingConv<[
|
|||
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
|
||||
]>;
|
||||
|
||||
// Vararg functions on windows pass floats in integer registers
|
||||
def CC_AArch64_Win64_VarArg : CallingConv<[
|
||||
CCIfType<[f16, f32], CCPromoteToType<f64>>,
|
||||
CCIfType<[f64], CCBitConvertToType<i64>>,
|
||||
CCDelegateTo<CC_AArch64_AAPCS>
|
||||
]>;
|
||||
|
||||
|
||||
// Darwin uses a calling convention which differs in only two ways
|
||||
// from the standard one at this level:
|
||||
|
|
|
@ -5138,6 +5138,7 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
|
|||
return selectOperator(I, I->getOpcode());
|
||||
// Silence warnings.
|
||||
(void)&CC_AArch64_DarwinPCS_VarArg;
|
||||
(void)&CC_AArch64_Win64_VarArg;
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
|
|
@ -41,6 +41,10 @@
|
|||
// | |
|
||||
// |-----------------------------------|
|
||||
// | |
|
||||
// | (Win64 only) varargs from reg |
|
||||
// | |
|
||||
// |-----------------------------------|
|
||||
// | |
|
||||
// | prev_fp, prev_lr |
|
||||
// | (a.k.a. "frame record") |
|
||||
// |-----------------------------------| <- fp(=x29)
|
||||
|
@ -950,7 +954,12 @@ static void computeCalleeSaveRegisterPairs(
|
|||
CC == CallingConv::PreserveMost ||
|
||||
(Count & 1) == 0) &&
|
||||
"Odd number of callee-saved regs to spill!");
|
||||
unsigned Offset = AFI->getCalleeSavedStackSize();
|
||||
int Offset = AFI->getCalleeSavedStackSize();
|
||||
|
||||
unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
|
||||
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
|
||||
if (Subtarget.isTargetWindows())
|
||||
Offset -= alignTo(GPRSaveSize, 16);
|
||||
|
||||
for (unsigned i = 0; i < Count; ++i) {
|
||||
RegPairInfo RPI;
|
||||
|
|
|
@ -2650,6 +2650,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
|
|||
case CallingConv::PreserveMost:
|
||||
case CallingConv::CXX_FAST_TLS:
|
||||
case CallingConv::Swift:
|
||||
if (Subtarget->isTargetWindows() && IsVarArg)
|
||||
return CC_AArch64_Win64_VarArg;
|
||||
if (!Subtarget->isTargetDarwin())
|
||||
return CC_AArch64_AAPCS;
|
||||
return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
|
||||
|
@ -2828,6 +2830,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
|
|||
// The AAPCS variadic function ABI is identical to the non-variadic
|
||||
// one. As a result there may be more arguments in registers and we should
|
||||
// save them for future reference.
|
||||
// Win64 variadic functions also pass arguments in registers, but all float
|
||||
// arguments are passed in integer registers.
|
||||
saveVarArgRegisters(CCInfo, DAG, DL, Chain);
|
||||
}
|
||||
|
||||
|
@ -2881,7 +2885,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
|
|||
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
|
||||
int GPRIdx = 0;
|
||||
if (GPRSaveSize != 0) {
|
||||
GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
|
||||
if (Subtarget->isTargetWindows())
|
||||
GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
|
||||
else
|
||||
GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
|
||||
|
||||
SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
|
||||
|
||||
|
@ -2890,7 +2897,11 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
|
|||
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
|
||||
SDValue Store = DAG.getStore(
|
||||
Val.getValue(1), DL, Val, FIN,
|
||||
MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
|
||||
Subtarget->isTargetWindows()
|
||||
? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
|
||||
GPRIdx,
|
||||
(i - FirstVariadicGPR) * 8)
|
||||
: MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
|
||||
MemOps.push_back(Store);
|
||||
FIN =
|
||||
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
|
||||
|
@ -2899,7 +2910,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
|
|||
FuncInfo->setVarArgsGPRIndex(GPRIdx);
|
||||
FuncInfo->setVarArgsGPRSize(GPRSaveSize);
|
||||
|
||||
if (Subtarget->hasFPARMv8()) {
|
||||
if (Subtarget->hasFPARMv8() && !Subtarget->isTargetWindows()) {
|
||||
static const MCPhysReg FPRArgRegs[] = {
|
||||
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
|
||||
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
|
||||
|
@ -4491,6 +4502,21 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
|
|||
MachinePointerInfo(SV));
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
AArch64FunctionInfo *FuncInfo =
|
||||
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
|
||||
? FuncInfo->getVarArgsGPRIndex()
|
||||
: FuncInfo->getVarArgsStackIndex(),
|
||||
getPointerTy(DAG.getDataLayout()));
|
||||
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
|
||||
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
|
||||
MachinePointerInfo(SV));
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
// The layout of the va_list struct is specified in the AArch64 Procedure Call
|
||||
|
@ -4562,8 +4588,12 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
|
|||
|
||||
SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
|
||||
: LowerAAPCS_VASTART(Op, DAG);
|
||||
if (Subtarget->isTargetWindows())
|
||||
return LowerWin64_VASTART(Op, DAG);
|
||||
else if (Subtarget->isTargetDarwin())
|
||||
return LowerDarwin_VASTART(Op, DAG);
|
||||
else
|
||||
return LowerAAPCS_VASTART(Op, DAG);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
|
||||
|
@ -4571,7 +4601,8 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
|
|||
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
|
||||
// pointer.
|
||||
SDLoc DL(Op);
|
||||
unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
|
||||
unsigned VaListSize =
|
||||
Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
|
||||
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
|
||||
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
|
||||
|
||||
|
@ -10780,7 +10811,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
|
|||
|
||||
unsigned
|
||||
AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
|
||||
if (Subtarget->isTargetDarwin())
|
||||
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
|
||||
return getPointerTy(DL).getSizeInBits();
|
||||
|
||||
return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
|
||||
|
|
|
@ -541,6 +541,7 @@ private:
|
|||
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
; RUN: llc < %s -mtriple=aarch64-pc-win32 | FileCheck %s
|
||||
|
||||
define void @pass_va(i32 %count, ...) nounwind {
|
||||
entry:
|
||||
; CHECK: sub sp, sp, #80
|
||||
; CHECK: add x8, sp, #24
|
||||
; CHECK: add x0, sp, #24
|
||||
; CHECK: stp x6, x7, [sp, #64]
|
||||
; CHECK: stp x4, x5, [sp, #48]
|
||||
; CHECK: stp x2, x3, [sp, #32]
|
||||
; CHECK: str x1, [sp, #24]
|
||||
; CHECK: stp x30, x8, [sp]
|
||||
; CHECK: bl other_func
|
||||
; CHECK: ldr x30, [sp], #80
|
||||
; CHECK: ret
|
||||
%ap = alloca i8*, align 8
|
||||
%ap1 = bitcast i8** %ap to i8*
|
||||
call void @llvm.va_start(i8* %ap1)
|
||||
%ap2 = load i8*, i8** %ap, align 8
|
||||
call void @other_func(i8* %ap2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @other_func(i8*) local_unnamed_addr
|
||||
|
||||
declare void @llvm.va_start(i8*) nounwind
|
||||
declare void @llvm.va_copy(i8*, i8*) nounwind
|
||||
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: sub sp, sp, #16
|
||||
; CHECK: add x8, sp, #24
|
||||
; CHECK: add x0, sp, #24
|
||||
; CHECK: str x8, [sp, #8]
|
||||
; CHECK: add sp, sp, #16
|
||||
; CHECK: ret
|
||||
define i8* @f9(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, ...) nounwind {
|
||||
entry:
|
||||
%ap = alloca i8*, align 8
|
||||
%ap1 = bitcast i8** %ap to i8*
|
||||
call void @llvm.va_start(i8* %ap1)
|
||||
%ap2 = load i8*, i8** %ap, align 8
|
||||
ret i8* %ap2
|
||||
}
|
||||
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: sub sp, sp, #16
|
||||
; CHECK: add x8, sp, #16
|
||||
; CHECK: add x0, sp, #16
|
||||
; CHECK: str x8, [sp, #8]
|
||||
; CHECK: add sp, sp, #16
|
||||
; CHECK: ret
|
||||
define i8* @f8(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, ...) nounwind {
|
||||
entry:
|
||||
%ap = alloca i8*, align 8
|
||||
%ap1 = bitcast i8** %ap to i8*
|
||||
call void @llvm.va_start(i8* %ap1)
|
||||
%ap2 = load i8*, i8** %ap, align 8
|
||||
ret i8* %ap2
|
||||
}
|
||||
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: sub sp, sp, #16
|
||||
; CHECK: add x8, sp, #8
|
||||
; CHECK: add x0, sp, #8
|
||||
; CHECK: stp x8, x7, [sp], #16
|
||||
; CHECK: ret
|
||||
define i8* @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, ...) nounwind {
|
||||
entry:
|
||||
%ap = alloca i8*, align 8
|
||||
%ap1 = bitcast i8** %ap to i8*
|
||||
call void @llvm.va_start(i8* %ap1)
|
||||
%ap2 = load i8*, i8** %ap, align 8
|
||||
ret i8* %ap2
|
||||
}
|
||||
|
||||
; CHECK-LABEL: copy1:
|
||||
; CHECK: sub sp, sp, #80
|
||||
; CHECK: add x8, sp, #24
|
||||
; CHECK: stp x6, x7, [sp, #64]
|
||||
; CHECK: stp x4, x5, [sp, #48]
|
||||
; CHECK: stp x2, x3, [sp, #32]
|
||||
; CHECK: stp x8, x1, [sp, #16]
|
||||
; CHECK: str x8, [sp, #8]
|
||||
; CHECK: add sp, sp, #80
|
||||
; CHECK: ret
|
||||
define void @copy1(i64 %a0, ...) nounwind {
|
||||
entry:
|
||||
%ap = alloca i8*, align 8
|
||||
%cp = alloca i8*, align 8
|
||||
%ap1 = bitcast i8** %ap to i8*
|
||||
%cp1 = bitcast i8** %cp to i8*
|
||||
call void @llvm.va_start(i8* %ap1)
|
||||
call void @llvm.va_copy(i8* %cp1, i8* %ap1)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue