[AArch64][GlobalISel] Support sibling calls with outgoing arguments
This adds support for lowering sibling calls with outgoing arguments. e.g ``` define void @foo(i32 %a) ``` Support is ported from AArch64ISelLowering's `isEligibleForTailCallOptimization`. The only thing that is missing is a full port of `TargetLowering::parametersInCSRMatch`. So, if we're using swiftself, we'll never tail call. - Rename `analyzeCallResult` to `analyzeArgInfo`, since the function is now used for both outgoing and incoming arguments - Teach `OutgoingArgHandler` about tail calls. Tail calls use frame indices for stack arguments. - Teach `lowerFormalArguments` to set the bytes in the caller's stack argument area. This is used later to check if the tail call's parameters will fit on the caller's stack. - Add `areCalleeOutgoingArgsTailCallable` to perform the eligibility check on the callee's outgoing arguments. For testing: - Update call-translator-tail-call to verify that we can now tail call with outgoing arguments, use G_FRAME_INDEX for stack arguments, and respect the size of the caller's stack - Remove GISel-specific check lines from speculation-hardening.ll, since GISel now tail calls like the other selectors - Add a GISel test line to tailcall-string-rvo.ll since we can tail call in that test now - Add a GISel test line to tailcall_misched_graph.ll since we tail call there now. Add specific check lines for GISel, since the debug output from the machine-scheduler differs with GlobalISel. The dependency still holds, but the output comes out in a different order. Differential Revision: https://reviews.llvm.org/D67471 llvm-svn: 371780
This commit is contained in:
parent
36e04d14e9
commit
a42070a6aa
|
@ -211,9 +211,11 @@ protected:
|
||||||
SmallVectorImpl<ArgInfo> &Args,
|
SmallVectorImpl<ArgInfo> &Args,
|
||||||
ValueHandler &Handler) const;
|
ValueHandler &Handler) const;
|
||||||
|
|
||||||
/// Analyze the return values of a call, incorporating info about the passed
|
/// Analyze passed or returned values from a call, supplied in \p ArgInfo,
|
||||||
/// values into \p CCState.
|
/// incorporating info about the passed values into \p CCState.
|
||||||
bool analyzeCallResult(CCState &CCState, SmallVectorImpl<ArgInfo> &Args,
|
///
|
||||||
|
/// Used to check if arguments are suitable for tail call lowering.
|
||||||
|
bool analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args,
|
||||||
CCAssignFn &Fn) const;
|
CCAssignFn &Fn) const;
|
||||||
|
|
||||||
/// \returns True if the calling convention for a callee and its caller pass
|
/// \returns True if the calling convention for a callee and its caller pass
|
||||||
|
|
|
@ -378,7 +378,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CallLowering::analyzeCallResult(CCState &CCState,
|
bool CallLowering::analyzeArgInfo(CCState &CCState,
|
||||||
SmallVectorImpl<ArgInfo> &Args,
|
SmallVectorImpl<ArgInfo> &Args,
|
||||||
CCAssignFn &Fn) const {
|
CCAssignFn &Fn) const {
|
||||||
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
|
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
|
||||||
|
@ -407,12 +407,12 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
|
||||||
|
|
||||||
SmallVector<CCValAssign, 16> ArgLocs1;
|
SmallVector<CCValAssign, 16> ArgLocs1;
|
||||||
CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
|
CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
|
||||||
if (!analyzeCallResult(CCInfo1, InArgs, CalleeAssignFn))
|
if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFn))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
SmallVector<CCValAssign, 16> ArgLocs2;
|
SmallVector<CCValAssign, 16> ArgLocs2;
|
||||||
CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
|
CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
|
||||||
if (!analyzeCallResult(CCInfo2, InArgs, CallerAssignFn))
|
if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFn))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// We need the argument locations to match up exactly. If there's more in
|
// We need the argument locations to match up exactly. If there's more in
|
||||||
|
|
|
@ -130,14 +130,26 @@ struct CallReturnHandler : public IncomingArgHandler {
|
||||||
struct OutgoingArgHandler : public CallLowering::ValueHandler {
|
struct OutgoingArgHandler : public CallLowering::ValueHandler {
|
||||||
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
|
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
|
||||||
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
|
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
|
||||||
CCAssignFn *AssignFnVarArg)
|
CCAssignFn *AssignFnVarArg, bool IsTailCall = false)
|
||||||
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
|
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
|
||||||
AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
|
AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), StackSize(0) {}
|
||||||
|
|
||||||
Register getStackAddress(uint64_t Size, int64_t Offset,
|
Register getStackAddress(uint64_t Size, int64_t Offset,
|
||||||
MachinePointerInfo &MPO) override {
|
MachinePointerInfo &MPO) override {
|
||||||
|
MachineFunction &MF = MIRBuilder.getMF();
|
||||||
LLT p0 = LLT::pointer(0, 64);
|
LLT p0 = LLT::pointer(0, 64);
|
||||||
LLT s64 = LLT::scalar(64);
|
LLT s64 = LLT::scalar(64);
|
||||||
|
|
||||||
|
if (IsTailCall) {
|
||||||
|
// TODO: For -tailcallopt tail calls, Offset will need FPDiff like in
|
||||||
|
// ISelLowering.
|
||||||
|
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
|
||||||
|
Register FIReg = MRI.createGenericVirtualRegister(p0);
|
||||||
|
MIRBuilder.buildFrameIndex(FIReg, FI);
|
||||||
|
MPO = MachinePointerInfo::getFixedStack(MF, FI);
|
||||||
|
return FIReg;
|
||||||
|
}
|
||||||
|
|
||||||
Register SPReg = MRI.createGenericVirtualRegister(p0);
|
Register SPReg = MRI.createGenericVirtualRegister(p0);
|
||||||
MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
|
MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
|
||||||
|
|
||||||
|
@ -147,7 +159,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
|
||||||
Register AddrReg = MRI.createGenericVirtualRegister(p0);
|
Register AddrReg = MRI.createGenericVirtualRegister(p0);
|
||||||
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
|
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
|
||||||
|
|
||||||
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
|
MPO = MachinePointerInfo::getStack(MF, Offset);
|
||||||
return AddrReg;
|
return AddrReg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,6 +200,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
|
||||||
|
|
||||||
MachineInstrBuilder MIB;
|
MachineInstrBuilder MIB;
|
||||||
CCAssignFn *AssignFnVarArg;
|
CCAssignFn *AssignFnVarArg;
|
||||||
|
bool IsTailCall;
|
||||||
uint64_t StackSize;
|
uint64_t StackSize;
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -378,6 +391,8 @@ bool AArch64CallLowering::lowerFormalArguments(
|
||||||
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
|
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
|
||||||
|
uint64_t StackOffset = Handler.StackUsed;
|
||||||
if (F.isVarArg()) {
|
if (F.isVarArg()) {
|
||||||
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
|
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
|
||||||
if (!Subtarget.isTargetDarwin()) {
|
if (!Subtarget.isTargetDarwin()) {
|
||||||
|
@ -387,14 +402,20 @@ bool AArch64CallLowering::lowerFormalArguments(
|
||||||
}
|
}
|
||||||
|
|
||||||
// We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
|
// We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
|
||||||
uint64_t StackOffset =
|
StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
|
||||||
alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
|
|
||||||
|
|
||||||
auto &MFI = MIRBuilder.getMF().getFrameInfo();
|
auto &MFI = MIRBuilder.getMF().getFrameInfo();
|
||||||
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
|
|
||||||
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
|
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Port checks for stack to restore for -tailcallopt from ISelLowering.
|
||||||
|
// We need to keep track of the size of function stacks for tail call
|
||||||
|
// optimization. When we tail call, we need to check if the callee's arguments
|
||||||
|
// will fit on the caller's stack. So, whenever we lower formal arguments,
|
||||||
|
// we should keep track of this information, since we might lower a tail call
|
||||||
|
// in this function later.
|
||||||
|
FuncInfo->setBytesInStackArgArea(StackOffset);
|
||||||
|
|
||||||
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
|
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
|
||||||
if (Subtarget.hasCustomCallingConv())
|
if (Subtarget.hasCustomCallingConv())
|
||||||
Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
|
Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
|
||||||
|
@ -454,9 +475,67 @@ bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
|
||||||
return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
|
return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
|
||||||
|
CallLoweringInfo &Info, MachineFunction &MF,
|
||||||
|
SmallVectorImpl<ArgInfo> &OutArgs) const {
|
||||||
|
// If there are no outgoing arguments, then we are done.
|
||||||
|
if (OutArgs.empty())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
const Function &CallerF = MF.getFunction();
|
||||||
|
CallingConv::ID CalleeCC = Info.CallConv;
|
||||||
|
CallingConv::ID CallerCC = CallerF.getCallingConv();
|
||||||
|
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
|
||||||
|
|
||||||
|
// We have outgoing arguments. Make sure that we can tail call with them.
|
||||||
|
SmallVector<CCValAssign, 16> OutLocs;
|
||||||
|
CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
|
||||||
|
|
||||||
|
if (!analyzeArgInfo(OutInfo, OutArgs,
|
||||||
|
*TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg))) {
|
||||||
|
LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that they can fit on the caller's stack.
|
||||||
|
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
|
||||||
|
if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
|
||||||
|
LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that the parameters in callee-saved registers match.
|
||||||
|
// TODO: Port this over to CallLowering as general code once swiftself is
|
||||||
|
// supported.
|
||||||
|
auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
|
||||||
|
const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
|
||||||
|
|
||||||
|
for (auto &ArgLoc : OutLocs) {
|
||||||
|
// If it's not a register, it's fine.
|
||||||
|
if (!ArgLoc.isRegLoc())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Register Reg = ArgLoc.getLocReg();
|
||||||
|
|
||||||
|
// Only look at callee-saved registers.
|
||||||
|
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// TODO: Port the remainder of this check from TargetLowering to support
|
||||||
|
// tail calling swiftself.
|
||||||
|
LLVM_DEBUG(
|
||||||
|
dbgs()
|
||||||
|
<< "... Cannot handle callee-saved registers in outgoing args yet.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool AArch64CallLowering::isEligibleForTailCallOptimization(
|
bool AArch64CallLowering::isEligibleForTailCallOptimization(
|
||||||
MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
|
MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
|
||||||
SmallVectorImpl<ArgInfo> &InArgs) const {
|
SmallVectorImpl<ArgInfo> &InArgs,
|
||||||
|
SmallVectorImpl<ArgInfo> &OutArgs) const {
|
||||||
CallingConv::ID CalleeCC = Info.CallConv;
|
CallingConv::ID CalleeCC = Info.CallConv;
|
||||||
MachineFunction &MF = MIRBuilder.getMF();
|
MachineFunction &MF = MIRBuilder.getMF();
|
||||||
const Function &CallerF = MF.getFunction();
|
const Function &CallerF = MF.getFunction();
|
||||||
|
@ -535,7 +614,8 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
|
||||||
assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
|
assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
|
||||||
"Unexpected variadic calling convention");
|
"Unexpected variadic calling convention");
|
||||||
|
|
||||||
// Look at the incoming values.
|
// Verify that the incoming and outgoing arguments from the callee are
|
||||||
|
// safe to tail call.
|
||||||
if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
|
if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
|
||||||
LLVM_DEBUG(
|
LLVM_DEBUG(
|
||||||
dbgs()
|
dbgs()
|
||||||
|
@ -543,13 +623,8 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For now, only handle callees that take no arguments.
|
if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
|
||||||
if (!Info.OrigArgs.empty()) {
|
|
||||||
LLVM_DEBUG(
|
|
||||||
dbgs()
|
|
||||||
<< "... Cannot tail call callees with outgoing arguments yet.\n");
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
LLVM_DEBUG(
|
LLVM_DEBUG(
|
||||||
dbgs() << "... Call is eligible for tail call optimization.\n");
|
dbgs() << "... Call is eligible for tail call optimization.\n");
|
||||||
|
@ -592,20 +667,20 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
SmallVector<ArgInfo, 8> SplitArgs;
|
SmallVector<ArgInfo, 8> OutArgs;
|
||||||
for (auto &OrigArg : Info.OrigArgs) {
|
for (auto &OrigArg : Info.OrigArgs) {
|
||||||
splitToValueTypes(OrigArg, SplitArgs, DL, MRI, Info.CallConv);
|
splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
|
||||||
// AAPCS requires that we zero-extend i1 to 8 bits by the caller.
|
// AAPCS requires that we zero-extend i1 to 8 bits by the caller.
|
||||||
if (OrigArg.Ty->isIntegerTy(1))
|
if (OrigArg.Ty->isIntegerTy(1))
|
||||||
SplitArgs.back().Flags[0].setZExt();
|
OutArgs.back().Flags[0].setZExt();
|
||||||
}
|
}
|
||||||
|
|
||||||
SmallVector<ArgInfo, 8> InArgs;
|
SmallVector<ArgInfo, 8> InArgs;
|
||||||
if (!Info.OrigRet.Ty->isVoidTy())
|
if (!Info.OrigRet.Ty->isVoidTy())
|
||||||
splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv());
|
splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv());
|
||||||
|
|
||||||
bool IsSibCall = Info.IsTailCall &&
|
bool IsSibCall = Info.IsTailCall && isEligibleForTailCallOptimization(
|
||||||
isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs);
|
MIRBuilder, Info, InArgs, OutArgs);
|
||||||
if (IsSibCall)
|
if (IsSibCall)
|
||||||
MF.getFrameInfo().setHasTailCall();
|
MF.getFrameInfo().setHasTailCall();
|
||||||
|
|
||||||
|
@ -655,8 +730,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
|
||||||
// Do the actual argument marshalling.
|
// Do the actual argument marshalling.
|
||||||
SmallVector<unsigned, 8> PhysRegs;
|
SmallVector<unsigned, 8> PhysRegs;
|
||||||
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
|
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
|
||||||
AssignFnVarArg);
|
AssignFnVarArg, IsSibCall);
|
||||||
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
|
if (!handleAssignments(MIRBuilder, OutArgs, Handler))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Now we can add the actual call instruction to the correct basic block.
|
// Now we can add the actual call instruction to the correct basic block.
|
||||||
|
|
|
@ -47,7 +47,8 @@ public:
|
||||||
bool
|
bool
|
||||||
isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder,
|
isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder,
|
||||||
CallLoweringInfo &Info,
|
CallLoweringInfo &Info,
|
||||||
SmallVectorImpl<ArgInfo> &InArgs) const;
|
SmallVectorImpl<ArgInfo> &InArgs,
|
||||||
|
SmallVectorImpl<ArgInfo> &OutArgs) const;
|
||||||
|
|
||||||
bool supportSwiftError() const override { return true; }
|
bool supportSwiftError() const override { return true; }
|
||||||
|
|
||||||
|
@ -67,6 +68,10 @@ private:
|
||||||
doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info,
|
doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info,
|
||||||
MachineFunction &MF,
|
MachineFunction &MF,
|
||||||
SmallVectorImpl<ArgInfo> &InArgs) const;
|
SmallVectorImpl<ArgInfo> &InArgs) const;
|
||||||
|
|
||||||
|
bool
|
||||||
|
areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF,
|
||||||
|
SmallVectorImpl<ArgInfo> &OutArgs) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
|
@ -24,22 +24,45 @@ define void @indirect_tail_call(void()* %func) {
|
||||||
}
|
}
|
||||||
|
|
||||||
declare void @outgoing_args_fn(i32)
|
declare void @outgoing_args_fn(i32)
|
||||||
; Right now, callees with outgoing arguments should not be tail called.
|
|
||||||
; TODO: Support this.
|
|
||||||
define void @test_outgoing_args(i32 %a) {
|
define void @test_outgoing_args(i32 %a) {
|
||||||
; COMMON-LABEL: name: test_outgoing_args
|
; COMMON-LABEL: name: test_outgoing_args
|
||||||
; COMMON: bb.1 (%ir-block.0):
|
; COMMON: bb.1 (%ir-block.0):
|
||||||
; COMMON: liveins: $w0
|
; COMMON: liveins: $w0
|
||||||
; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
|
|
||||||
; COMMON: $w0 = COPY [[COPY]](s32)
|
; COMMON: $w0 = COPY [[COPY]](s32)
|
||||||
; COMMON: BL @outgoing_args_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0
|
; COMMON: TCRETURNdi @outgoing_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0
|
||||||
; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
|
|
||||||
; COMMON: RET_ReallyLR
|
|
||||||
tail call void @outgoing_args_fn(i32 %a)
|
tail call void @outgoing_args_fn(i32 %a)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Verify that we create frame indices for memory arguments in tail calls.
|
||||||
|
; We get a bunch of copies here which are unused and thus eliminated. So, let's
|
||||||
|
; just focus on what matters, which is that we get a G_FRAME_INDEX.
|
||||||
|
declare void @outgoing_stack_args_fn(<4 x half>)
|
||||||
|
define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) {
|
||||||
|
; COMMON-LABEL: name: test_outgoing_stack_args
|
||||||
|
; COMMON: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
|
||||||
|
; COMMON: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1)
|
||||||
|
; COMMON: $d0 = COPY [[LOAD]](<4 x s16>)
|
||||||
|
; COMMON: TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0
|
||||||
|
tail call void @outgoing_stack_args_fn(<4 x half> %arg)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Verify that we don't tail call when we cannot fit arguments on the caller's
|
||||||
|
; stack.
|
||||||
|
declare i32 @too_big_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s)
|
||||||
|
define i32 @test_too_big_stack() {
|
||||||
|
; COMMON-LABEL: name: test_too_big_stack
|
||||||
|
; COMMON-NOT: TCRETURNdi
|
||||||
|
; COMMON-NOT: TCRETURNri
|
||||||
|
; COMMON: BL @too_big_stack
|
||||||
|
; COMMON-DAG: RET_ReallyLR
|
||||||
|
entry:
|
||||||
|
%call = tail call i32 @too_big_stack(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9)
|
||||||
|
ret i32 %call
|
||||||
|
}
|
||||||
|
|
||||||
; Right now, we don't want to tail call callees with nonvoid return types, since
|
; Right now, we don't want to tail call callees with nonvoid return types, since
|
||||||
; call lowering will insert COPYs after the call.
|
; call lowering will insert COPYs after the call.
|
||||||
; TODO: Support this.
|
; TODO: Support this.
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
|
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
|
||||||
|
; RUN: llc -global-isel < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
|
||||||
|
|
||||||
@t = weak global i32 ()* null
|
@t = weak global i32 ()* null
|
||||||
@x = external global i32, align 4
|
@x = external global i32, align 4
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
|
; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
|
||||||
; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
|
; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
|
||||||
; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH,GISELSLH --dump-input-on-failure
|
; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
|
||||||
; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,GISELNOSLH --dump-input-on-failure
|
; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
|
||||||
; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
|
; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
|
||||||
; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
|
; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
|
||||||
|
|
||||||
define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR {
|
define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR {
|
||||||
; CHECK-LABEL: f
|
; CHECK-LABEL: f
|
||||||
|
@ -54,24 +54,13 @@ return: ; preds = %entry, %if.then
|
||||||
; Make sure that for a tail call, taint doesn't get put into SP twice.
|
; Make sure that for a tail call, taint doesn't get put into SP twice.
|
||||||
define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR {
|
define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR {
|
||||||
; CHECK-LABEL: tail_caller:
|
; CHECK-LABEL: tail_caller:
|
||||||
; NOGISELSLH: mov [[TMPREG:x[0-9]+]], sp
|
; SLH: mov [[TMPREG:x[0-9]+]], sp
|
||||||
; NOGISELSLH: and [[TMPREG]], [[TMPREG]], x16
|
; SLH: and [[TMPREG]], [[TMPREG]], x16
|
||||||
; NOGISELSLH: mov sp, [[TMPREG]]
|
; SLH: mov sp, [[TMPREG]]
|
||||||
; NOGISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp
|
; NOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp
|
||||||
; NOGISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16
|
; NOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16
|
||||||
; NOGISELNOSLH-NOT: mov sp, [[TMPREG]]
|
; NOSLH-NOT: mov sp, [[TMPREG]]
|
||||||
; GISELSLH: mov [[TMPREG:x[0-9]+]], sp
|
; SLH: b tail_callee
|
||||||
; GISELSLH: and [[TMPREG]], [[TMPREG]], x16
|
|
||||||
; GISELSLH: mov sp, [[TMPREG]]
|
|
||||||
; GISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp
|
|
||||||
; GISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16
|
|
||||||
; GISELNOSLH-NOT: mov sp, [[TMPREG]]
|
|
||||||
; GlobalISel doesn't optimize tail calls (yet?), so only check that
|
|
||||||
; cross-call taint register setup code is missing if a tail call was
|
|
||||||
; actually produced.
|
|
||||||
; NOGISELSLH: b tail_callee
|
|
||||||
; GISELSLH: bl tail_callee
|
|
||||||
; GISELSLH: cmp sp, #0
|
|
||||||
; SLH-NOT: cmp sp, #0
|
; SLH-NOT: cmp sp, #0
|
||||||
%call = tail call i32 @tail_callee(i32 %a)
|
%call = tail call i32 @tail_callee(i32 %a)
|
||||||
ret i32 %call
|
ret i32 %call
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
; RUN: llc -relocation-model=static -verify-machineinstrs -O2 < %s | FileCheck %s
|
; RUN: llc -relocation-model=static -verify-machineinstrs -O2 < %s | FileCheck %s
|
||||||
|
; RUN: llc -relocation-model=static -verify-machineinstrs -global-isel -O2 < %s | FileCheck %s
|
||||||
|
|
||||||
; The call to function TestBar should be a tail call, when in C++ the string
|
; The call to function TestBar should be a tail call, when in C++ the string
|
||||||
; `ret` is RVO returned.
|
; `ret` is RVO returned.
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
; RUN: llc -mcpu=cyclone -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s
|
; RUN: llc -mcpu=cyclone -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s --check-prefixes=COMMON,SDAG
|
||||||
|
; RUN: llc -mcpu=cyclone -global-isel -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GISEL
|
||||||
|
|
||||||
; REQUIRES: asserts
|
; REQUIRES: asserts
|
||||||
|
|
||||||
|
@ -20,25 +21,34 @@ declare void @callee2(i8*, i8*, i8*, i8*, i8*,
|
||||||
; PR23459 has a test case that we where miscompiling because of this at the
|
; PR23459 has a test case that we where miscompiling because of this at the
|
||||||
; time.
|
; time.
|
||||||
|
|
||||||
; CHECK: Frame Objects
|
; COMMON: Frame Objects
|
||||||
; CHECK: fi#-4: {{.*}} fixed, at location [SP+8]
|
; COMMON: fi#-4: {{.*}} fixed, at location [SP+8]
|
||||||
; CHECK: fi#-3: {{.*}} fixed, at location [SP]
|
; COMMON: fi#-3: {{.*}} fixed, at location [SP]
|
||||||
; CHECK: fi#-2: {{.*}} fixed, at location [SP+8]
|
; COMMON: fi#-2: {{.*}} fixed, at location [SP+8]
|
||||||
; CHECK: fi#-1: {{.*}} fixed, at location [SP]
|
; COMMON: fi#-1: {{.*}} fixed, at location [SP]
|
||||||
|
|
||||||
; CHECK: [[VRA:%.*]]:gpr64 = LDRXui %fixed-stack.3
|
; The order that these appear in differes in GISel than SDAG, but the
|
||||||
; CHECK: [[VRB:%.*]]:gpr64 = LDRXui %fixed-stack.2
|
; dependency relationship still holds.
|
||||||
; CHECK: STRXui %{{.*}}, %fixed-stack.0
|
; COMMON: [[VRA:%.*]]:gpr64 = LDRXui %fixed-stack.3
|
||||||
; CHECK: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1
|
; COMMON: [[VRB:%.*]]:gpr64 = LDRXui %fixed-stack.2
|
||||||
|
; SDAG: STRXui %{{.*}}, %fixed-stack.0
|
||||||
|
; SDAG: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1
|
||||||
|
; GISEL: STRXui [[VRB]]{{[^,]*}}, %fixed-stack.1
|
||||||
|
; GISEL: STRXui %{{.*}}, %fixed-stack.0
|
||||||
|
|
||||||
; Make sure that there is an dependence edge between fi#-2 and fi#-4.
|
; Make sure that there is an dependence edge between fi#-2 and fi#-4.
|
||||||
; Without this edge the scheduler would be free to move the store accross the load.
|
; Without this edge the scheduler would be free to move the store accross the load.
|
||||||
|
|
||||||
; CHECK: SU({{.*}}): [[VRB]]:gpr64 = LDRXui %fixed-stack.2
|
; COMMON: SU({{.*}}): [[VRB]]:gpr64 = LDRXui %fixed-stack.2
|
||||||
; CHECK-NOT: SU
|
; COMMON-NOT: SU
|
||||||
; CHECK: Successors:
|
; COMMON: Successors:
|
||||||
; CHECK: SU([[DEPSTOREB:.*]]): Ord Latency=0
|
; COMMON: SU([[DEPSTOREB:.*]]): Ord Latency=0
|
||||||
; CHECK: SU([[DEPSTOREA:.*]]): Ord Latency=0
|
; COMMON: SU([[DEPSTOREA:.*]]): Ord Latency=0
|
||||||
|
|
||||||
; CHECK: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.0
|
; GlobalISel outputs DEPSTOREB before DEPSTOREA, but the dependency relationship
|
||||||
; CHECK: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.1
|
; still holds.
|
||||||
|
; SDAG: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.0
|
||||||
|
; SDAG: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.1
|
||||||
|
|
||||||
|
; GISEL: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.0
|
||||||
|
; GISEL: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.1
|
||||||
|
|
Loading…
Reference in New Issue