From 74a0bd319ad9a6bca95684d4c87851e4583725e9 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 13 Apr 2016 21:43:16 +0000 Subject: [PATCH] AArch64: Use a callee save registers for swiftself parameters It is very likely that the swiftself parameter is alive throughout most functions function so putting it into a callee save register should avoid spills for the callers with only a minimum amount of extra spills in the callees. Currently the generated code is correct but unnecessarily spills and reloads arguments passed in callee save registers, I will address this in upcoming patches. This also adds a missing check that for tail calls the preserved value of the caller must be the same as the callees parameter. Differential Revision: http://reviews.llvm.org/D19007 llvm-svn: 266251 --- .../AArch64/AArch64CallingConvention.td | 7 +- .../Target/AArch64/AArch64FrameLowering.cpp | 14 ++-- .../Target/AArch64/AArch64ISelLowering.cpp | 38 +++++++-- llvm/test/CodeGen/AArch64/swiftself.ll | 82 ++++++++++++++----- 4 files changed, 104 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index f6bd012d8c8b..178e3971640e 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -45,6 +45,9 @@ def CC_AArch64_AAPCS : CallingConv<[ // supported there. CCIfNest>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + CCIfConsecutiveRegs>, // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, @@ -128,8 +131,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // slot is 64-bit. CCIfByVal>, - // A SwiftSelf is passed in X9. - CCIfSwiftSelf>>, + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, // A SwiftError is passed in X19. CCIfSwiftError>>, diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 047cd577e008..57e320ab2673 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -697,13 +697,13 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { - if (Reg != AArch64::LR) - return getKillRegState(true); - - // LR maybe referred to later by an @llvm.returnaddress intrinsic. - bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR); - bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken()); - return getKillRegState(LRKill); + // Do not set a kill flag on values that are also marked as live-in. This + // happens with the @llvm-returnaddress intrinsic and with arguments passed in + // callee saved registers. + // Omitting the kill flags is conservatively correct even if the live-in + // is not used after all. + bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); + return getKillRegState(!IsLiveIn); } static bool produceCompactUnwindFrame(MachineFunction &MF) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1c0bb181d8c8..7cd8ca99e617 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2875,10 +2875,11 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( CCAssignFnForCall(CallerCC, isVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (!CCMatch) { - const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); - if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), - TRI->getCallPreservedMask(MF, CalleeCC))) + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } @@ -2893,9 +2894,34 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( const AArch64FunctionInfo *FuncInfo = MF.getInfo(); - // If the stack arguments for this call would fit into our own save area then - // the call can be made tail. - return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); + // If the stack arguments for this call do not fit into our own save area then + // the call cannot be made tail. + if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) + return false; + + // Parameters passed in callee saved registers must have the same value in + // caller and callee. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + const CCValAssign &ArgLoc = ArgLocs[I]; + if (!ArgLoc.isRegLoc()) + continue; + unsigned Reg = ArgLoc.getLocReg(); + // Only look at callee saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreserved, Reg)) + continue; + // Check that we pass the value used for the caller. + // (We look for a CopyFromReg reading a virtual register that is used + // for the function live-in value of register Reg) + SDValue Value = OutVals[I]; + if (Value->getOpcode() != ISD::CopyFromReg) + return false; + unsigned ArgReg = cast(Value->getOperand(1))->getReg(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.getLiveInPhysReg(ArgReg) != Reg) + return false; + } + + return true; } SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, diff --git a/llvm/test/CodeGen/AArch64/swiftself.ll b/llvm/test/CodeGen/AArch64/swiftself.ll index f93f8f398225..a60aed6b0f2b 100644 --- a/llvm/test/CodeGen/AArch64/swiftself.ll +++ b/llvm/test/CodeGen/AArch64/swiftself.ll @@ -1,29 +1,67 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-apple-ios | FileCheck --check-prefix=CHECK-APPLE %s -; RUN: llc -O0 -verify-machineinstrs < %s -mtriple=aarch64-apple-ios | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s +; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s -; Parameter with swiftself should be allocated to x9. -define void @check_swiftself(i32* swiftself %addr0) { -; CHECK-APPLE-LABEL: check_swiftself: -; CHECK-O0-LABEL: check_swiftself: - - %val0 = load volatile i32, i32* %addr0 -; CHECK-APPLE: ldr w{{.*}}, [x9] -; CHECK-O0: ldr w{{.*}}, [x9] - ret void +; Parameter with swiftself should be allocated to x20. +; CHECK-LABEL: swiftself_param: +; CHECK: mov x0, x20 +; CHECK-NEXT: ret +define i8* @swiftself_param(i8* swiftself %addr0) { + ret i8 *%addr0 } -@var8_3 = global i8 0 -declare void @take_swiftself(i8* swiftself %addr0) +; Check that x20 is used to pass a swiftself argument. +; CHECK-LABEL: call_swiftself: +; CHECK: mov x20, x0 +; CHECK: bl {{_?}}swiftself_param +; CHECK: ret +define i8 *@call_swiftself(i8* %arg) { + %res = call i8 *@swiftself_param(i8* swiftself %arg) + ret i8 *%res +} -define void @simple_args() { -; CHECK-APPLE-LABEL: simple_args: -; CHECK-O0-LABEL: simple_args: - - call void @take_swiftself(i8* @var8_3) -; CHECK-APPLE: add x9, -; CHECK-APPLE: bl {{_?}}take_swiftself -; CHECK-O0: add x9, -; CHECK-O0: bl {{_?}}take_swiftself +; x20 should be saved by the callee even if used for swiftself +; CHECK-LABEL: swiftself_clobber: +; CHECK: {{stp|str}} {{.*}}x20{{.*}}sp +; ... +; CHECK: {{ldp|ldr}} {{.*}}x20{{.*}}sp +; CHECK: ret +define i8 *@swiftself_clobber(i8* swiftself %addr0) { + call void asm sideeffect "", "~{x20}"() + ret i8 *%addr0 +} +; Demonstrate that we do not need any movs when calling multiple functions +; with swiftself argument. +; CHECK-LABEL: swiftself_passthrough: +; OPT-NOT: mov{{.*}}x20 +; OPT: bl {{_?}}swiftself_param +; OPT-NOT: mov{{.*}}x20 +; OPT-NEXT: bl {{_?}}swiftself_param +; OPT: ret +define void @swiftself_passthrough(i8* swiftself %addr0) { + call i8 *@swiftself_param(i8* swiftself %addr0) + call i8 *@swiftself_param(i8* swiftself %addr0) ret void } + +; We can use a tail call if the callee swiftself is the same as the caller one. +; CHECK-LABEL: swiftself_tail: +; OPT: b {{_?}}swiftself_param +; OPT-NOT: ret +define i8* @swiftself_tail(i8* swiftself %addr0) { + call void asm sideeffect "", "~{x20}"() + %res = tail call i8* @swiftself_param(i8* swiftself %addr0) + ret i8* %res +} + +; We can not use a tail call if the callee swiftself is not the same as the +; caller one. +; CHECK-LABEL: swiftself_notail: +; CHECK: mov x20, x0 +; CHECK: bl {{_?}}swiftself_param +; CHECK: ret +define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind { + %res = tail call i8* @swiftself_param(i8* swiftself %addr1) + ret i8* %res +}