AArch64: Use a callee save registers for swiftself parameters

It is very likely that the swiftself parameter is alive throughout most
functions function so putting it into a callee save register should
avoid spills for the callers with only a minimum amount of extra spills
in the callees.

Currently the generated code is correct but unnecessarily spills and
reloads arguments passed in callee save registers, I will address this
in upcoming patches.

This also adds a missing check that for tail calls the preserved value
of the caller must be the same as the callees parameter.

Differential Revision: http://reviews.llvm.org/D19007

llvm-svn: 266251
This commit is contained in:
Matthias Braun 2016-04-13 21:43:16 +00:00
parent b10e893273
commit 74a0bd319a
4 changed files with 104 additions and 37 deletions

View File

@ -45,6 +45,9 @@ def CC_AArch64_AAPCS : CallingConv<[
// supported there.
CCIfNest<CCAssignToReg<[X18]>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
@ -128,8 +131,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
// A SwiftSelf is passed in X9.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X9], [W9]>>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
// A SwiftError is passed in X19.
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,

View File

@ -697,13 +697,13 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
if (Reg != AArch64::LR)
return getKillRegState(true);
// LR maybe referred to later by an @llvm.returnaddress intrinsic.
bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
return getKillRegState(LRKill);
// Do not set a kill flag on values that are also marked as live-in. This
// happens with the @llvm-returnaddress intrinsic and with arguments passed in
// callee saved registers.
// Omitting the kill flags is conservatively correct even if the live-in
// is not used after all.
bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
return getKillRegState(!IsLiveIn);
}
static bool produceCompactUnwindFrame(MachineFunction &MF) {

View File

@ -2875,10 +2875,11 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CCAssignFnForCall(CallerCC, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
if (!CCMatch) {
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
TRI->getCallPreservedMask(MF, CalleeCC)))
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
@ -2893,9 +2894,34 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
// If the stack arguments for this call would fit into our own save area then
// the call can be made tail.
return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
return false;
// Parameters passed in callee saved registers must have the same value in
// caller and callee.
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
unsigned Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreserved, Reg))
continue;
// Check that we pass the value used for the caller.
// (We look for a CopyFromReg reading a virtual register that is used
// for the function live-in value of register Reg)
SDValue Value = OutVals[I];
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
return true;
}
SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,

View File

@ -1,29 +1,67 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-apple-ios | FileCheck --check-prefix=CHECK-APPLE %s
; RUN: llc -O0 -verify-machineinstrs < %s -mtriple=aarch64-apple-ios | FileCheck --check-prefix=CHECK-O0 %s
; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s
; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s
; Parameter with swiftself should be allocated to x9.
define void @check_swiftself(i32* swiftself %addr0) {
; CHECK-APPLE-LABEL: check_swiftself:
; CHECK-O0-LABEL: check_swiftself:
; Parameter with swiftself should be allocated to x20.
; CHECK-LABEL: swiftself_param:
; CHECK: mov x0, x20
; CHECK-NEXT: ret
define i8* @swiftself_param(i8* swiftself %addr0) {
ret i8 *%addr0
}
%val0 = load volatile i32, i32* %addr0
; CHECK-APPLE: ldr w{{.*}}, [x9]
; CHECK-O0: ldr w{{.*}}, [x9]
; Check that x20 is used to pass a swiftself argument.
; CHECK-LABEL: call_swiftself:
; CHECK: mov x20, x0
; CHECK: bl {{_?}}swiftself_param
; CHECK: ret
define i8 *@call_swiftself(i8* %arg) {
%res = call i8 *@swiftself_param(i8* swiftself %arg)
ret i8 *%res
}
; x20 should be saved by the callee even if used for swiftself
; CHECK-LABEL: swiftself_clobber:
; CHECK: {{stp|str}} {{.*}}x20{{.*}}sp
; ...
; CHECK: {{ldp|ldr}} {{.*}}x20{{.*}}sp
; CHECK: ret
define i8 *@swiftself_clobber(i8* swiftself %addr0) {
call void asm sideeffect "", "~{x20}"()
ret i8 *%addr0
}
; Demonstrate that we do not need any movs when calling multiple functions
; with swiftself argument.
; CHECK-LABEL: swiftself_passthrough:
; OPT-NOT: mov{{.*}}x20
; OPT: bl {{_?}}swiftself_param
; OPT-NOT: mov{{.*}}x20
; OPT-NEXT: bl {{_?}}swiftself_param
; OPT: ret
define void @swiftself_passthrough(i8* swiftself %addr0) {
call i8 *@swiftself_param(i8* swiftself %addr0)
call i8 *@swiftself_param(i8* swiftself %addr0)
ret void
}
@var8_3 = global i8 0
declare void @take_swiftself(i8* swiftself %addr0)
define void @simple_args() {
; CHECK-APPLE-LABEL: simple_args:
; CHECK-O0-LABEL: simple_args:
call void @take_swiftself(i8* @var8_3)
; CHECK-APPLE: add x9,
; CHECK-APPLE: bl {{_?}}take_swiftself
; CHECK-O0: add x9,
; CHECK-O0: bl {{_?}}take_swiftself
ret void
; We can use a tail call if the callee swiftself is the same as the caller one.
; CHECK-LABEL: swiftself_tail:
; OPT: b {{_?}}swiftself_param
; OPT-NOT: ret
define i8* @swiftself_tail(i8* swiftself %addr0) {
call void asm sideeffect "", "~{x20}"()
%res = tail call i8* @swiftself_param(i8* swiftself %addr0)
ret i8* %res
}
; We can not use a tail call if the callee swiftself is not the same as the
; caller one.
; CHECK-LABEL: swiftself_notail:
; CHECK: mov x20, x0
; CHECK: bl {{_?}}swiftself_param
; CHECK: ret
define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind {
%res = tail call i8* @swiftself_param(i8* swiftself %addr1)
ret i8* %res
}