From d55e115b58addbd13c12bbe451ed66ad57a37387 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Wed, 5 Mar 2014 15:25:27 +0000 Subject: [PATCH] ARM: Correctly align arguments after a byval struct is passed on the stack llvm-svn: 202985 --- llvm/include/llvm/CodeGen/CallingConvLower.h | 5 + llvm/lib/Target/ARM/ARMFrameLowering.cpp | 25 ++-- llvm/lib/Target/ARM/ARMISelLowering.cpp | 129 ++++++++++++------ llvm/lib/Target/ARM/ARMISelLowering.h | 5 +- llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 24 ++-- .../2013-04-05-Small-ByVal-Structs-PR15293.ll | 4 +- ...013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll | 6 +- .../2014-02-21-byval-reg-split-alignment.ll | 114 ++++++++++++++++ 8 files changed, 245 insertions(+), 67 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 4bc8cecaff82..25cff38b2d58 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -404,6 +404,11 @@ public: ByValRegs.clear(); } + // Rewind byval registers tracking info. + void rewindByValRegsInfo() { + InRegsParamsProceed = 0; + } + ParmContext getCallOrPrologue() const { return CallOrPrologue; } private: diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 340f49ffeadc..274a125126ad 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -175,7 +175,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - // Allocate the vararg register save area. This is not counted in NumBytes. + // Allocate the vararg register save area. if (ArgRegsSaveSize) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); @@ -188,13 +188,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } if (!AFI->hasStackFrame()) { - if (NumBytes != 0) { - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + if (NumBytes - ArgRegsSaveSize != 0) { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); MCSymbol *SPLabel = Context.CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(SPLabel); - CFAOffset -= NumBytes; + CFAOffset -= NumBytes - ArgRegsSaveSize; MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(SPLabel, CFAOffset)); } @@ -246,12 +246,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // Determine starting offsets of spill areas. bool HasFP = hasFP(MF); - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - (ArgRegsSaveSize + GPRCS1Size + + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size; + FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + + GPRCS1Size + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -339,7 +341,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::LR: MMI.addFrameInst(MCCFIInstruction::createOffset(SPLabel, MRI->getDwarfRegNum(Reg, true), - MFI->getObjectOffset(FI) - ArgRegsSaveSize)); + MFI->getObjectOffset(FI))); break; } } @@ -390,7 +392,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R12: if (STI.isTargetMachO()) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned Offset = MFI->getObjectOffset(FI) - ArgRegsSaveSize; + unsigned Offset = MFI->getObjectOffset(FI); MMI.addFrameInst( MCCFIInstruction::createOffset(SPLabel, DwarfReg, Offset)); } @@ -536,8 +538,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, return; if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + if (NumBytes - ArgRegsSaveSize != 0) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); @@ -550,7 +552,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, } // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + NumBytes -= (ArgRegsSaveSize + + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index ebcc2aa9dd57..691961ef2afa 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1826,22 +1826,6 @@ ARMTargetLowering::HandleByVal( State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - // For in-prologue parameters handling, we also introduce stack offset - // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. - // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how - // NSAA should be evaluted (NSAA means "next stacked argument address"). - // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. - // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. - unsigned NSAAOffset = State->getNextStackOffset(); - if (State->getCallOrPrologue() != Call) { - for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { - unsigned RB, RE; - State->getInRegsParamInfo(i, RB, RE); - assert(NSAAOffset >= (RE-RB)*4 && - "Stack offset for byval regs doesn't introduced anymore?"); - NSAAOffset -= (RE-RB)*4; - } - } if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { unsigned AlignInRegs = Align / 4; @@ -1856,6 +1840,7 @@ ARMTargetLowering::HandleByVal( // all remained GPR regs. In that case we can't split parameter, we must // send it to stack. We also must set NCRN to R4, so waste all // remained registers. + const unsigned NSAAOffset = State->getNextStackOffset(); if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { while (State->AllocateReg(GPRArgRegs, 4)) ; @@ -1875,18 +1860,14 @@ ARMTargetLowering::HandleByVal( // allocate remained amount of registers we need. for (unsigned i = reg+1; i != ByValRegEnd; ++i) State->AllocateReg(GPRArgRegs, 4); - // At a call site, a byval parameter that is split between - // registers and memory needs its size truncated here. In a - // function prologue, such byval parameters are reassembled in - // memory, and are not truncated. - if (State->getCallOrPrologue() == Call) { - // Make remained size equal to 0 in case, when - // the whole structure may be stored into registers. - if (size < excess) - size = 0; - else - size -= excess; - } + // A byval parameter that is split between registers and memory needs its + // size truncated here. + // In the case where the entire structure fits in registers, we set the + // size in memory to zero. + if (size < excess) + size = 0; + else + size -= excess; } } } @@ -2794,7 +2775,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, unsigned OffsetFromOrigArg, unsigned ArgOffset, unsigned ArgSize, - bool ForceMutable) const { + bool ForceMutable, + unsigned ByValStoreOffset, + unsigned TotalArgRegsSaveSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. @@ -2831,7 +2814,6 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, // Note: once stack area for byval/varargs registers // was initialized, it can't be initialized again. if (ArgRegsSaveSize) { - unsigned Padding = ArgRegsSaveSize - ArgRegsSize; if (Padding) { @@ -2840,11 +2822,18 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setStoredByValParamsPadding(Padding); } - int FrameIndex = MFI->CreateFixedObject( - ArgRegsSaveSize, - Padding + ArgOffset, - false); + int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize, + Padding + + ByValStoreOffset - + (int64_t)TotalArgRegsSaveSize, + false); SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); + if (Padding) { + MFI->CreateFixedObject(Padding, + ArgOffset + ByValStoreOffset - + (int64_t)ArgRegsSaveSize, + false); + } SmallVector MemOps; for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; @@ -2872,10 +2861,16 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); return FrameIndex; - } else + } else { + if (ArgSize == 0) { + // We cannot allocate a zero-byte object for the first variadic argument, + // so just make up a size. + ArgSize = 4; + } // This will point to the next argument passed via stack. return MFI->CreateFixedObject( - 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); + ArgSize, ArgOffset, !ForceMutable); + } } // Setup stack frame, the va_list pointer will start from. @@ -2883,6 +2878,7 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, unsigned ArgOffset, + unsigned TotalArgRegsSaveSize, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -2894,7 +2890,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // argument passed via stack. int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, 0, ForceMutable); + 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -2931,6 +2927,51 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); + unsigned ByValStoreOffset = 0; + unsigned TotalArgRegsSaveSize = 0; + unsigned ArgRegsSaveSizeMaxAlign = 4; + + // Calculate the amount of stack space that we need to allocate to store + // byval and variadic arguments that are passed in registers. + // We need to know this before we allocate the first byval or variadic + // argument, as they will be allocated a stack slot below the CFA (Canonical + // Frame Address, the stack pointer at entry to the function). + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isMemLoc()) { + int index = VA.getValNo(); + if (index != lastInsIndex) { + ISD::ArgFlagsTy Flags = Ins[index].Flags; + if (Flags.isByVal()) { + unsigned ExtraArgRegsSize; + unsigned ExtraArgRegsSaveSize; + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(), + Flags.getByValSize(), + ExtraArgRegsSize, ExtraArgRegsSaveSize); + + TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign) + ArgRegsSaveSizeMaxAlign = Flags.getByValAlign(); + CCInfo.nextInRegsParam(); + } + lastInsIndex = index; + } + } + } + CCInfo.rewindByValRegsInfo(); + lastInsIndex = -1; + if (isVarArg) { + unsigned ExtraArgRegsSize; + unsigned ExtraArgRegsSaveSize; + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, + ExtraArgRegsSize, ExtraArgRegsSaveSize); + TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + } + // If the arg regs save area contains N-byte aligned values, the + // bottom of it must be at least N-byte aligned. + TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign); + TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); @@ -3029,18 +3070,23 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // a tail call. if (Flags.isByVal()) { unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); + + ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); int FrameIndex = StoreByValRegs( CCInfo, DAG, dl, Chain, CurOrigArg, CurByValIndex, Ins[VA.getValNo()].PartOffset, VA.getLocMemOffset(), Flags.getByValSize(), - true /*force mutable frames*/); + true /*force mutable frames*/, + ByValStoreOffset, + TotalArgRegsSaveSize); + ByValStoreOffset += Flags.getByValSize(); + ByValStoreOffset = std::min(ByValStoreOffset, 16U); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { - unsigned FIOffset = VA.getLocMemOffset() + - AFI->getStoredByValParamsPadding(); + unsigned FIOffset = VA.getLocMemOffset(); int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, FIOffset, true); @@ -3058,7 +3104,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // varargs if (isVarArg) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, - CCInfo.getNextStackOffset()); + CCInfo.getNextStackOffset(), + TotalArgRegsSaveSize); return Chain; } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index bae23a69b596..f463dfcca656 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -492,11 +492,14 @@ namespace llvm { unsigned OffsetFromOrigArg, unsigned ArgOffset, unsigned ArgSize, - bool ForceMutable) const; + bool ForceMutable, + unsigned ByValStoreOffset, + unsigned TotalArgRegsSaveSize) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, unsigned ArgOffset, + unsigned TotalArgRegsSaveSize, bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 996428dbeb12..009104e6149c 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -94,6 +94,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); + assert(NumBytes >= ArgRegsSaveSize && + "ArgRegsSaveSize is included in NumBytes"); const std::vector &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -121,13 +123,13 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { } if (!AFI->hasStackFrame()) { - if (NumBytes != 0) { - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + if (NumBytes - ArgRegsSaveSize != 0) { + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(SPLabel); - CFAOffset -= NumBytes; + CFAOffset -= NumBytes - ArgRegsSaveSize; MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(SPLabel, CFAOffset)); } @@ -168,7 +170,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { } // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); @@ -219,7 +221,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::LR: MMI.addFrameInst(MCCFIInstruction::createOffset(SPLabel, MRI->getDwarfRegNum(Reg, true), - MFI->getObjectOffset(FI) - ArgRegsSaveSize)); + MFI->getObjectOffset(FI))); break; } } @@ -227,7 +229,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { - FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size; + FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + + GPRCS1Size + ArgRegsSaveSize; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); @@ -324,12 +327,14 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); + assert(NumBytes >= ArgRegsSaveSize && + "ArgRegsSaveSize is included in NumBytes"); const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); + if (NumBytes - ArgRegsSaveSize != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { @@ -343,7 +348,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + - AFI->getDPRCalleeSavedAreaSize()); + AFI->getDPRCalleeSavedAreaSize() + + ArgRegsSaveSize); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; diff --git a/llvm/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/llvm/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll index 127429bc31e3..c5eba7d4773c 100644 --- a/llvm/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll +++ b/llvm/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll @@ -4,8 +4,8 @@ ;CHECK-LABEL: foo: ;CHECK: sub sp, sp, #8 ;CHECK: push {r11, lr} -;CHECK: str r0, [sp, #8] -;CHECK: add r0, sp, #8 +;CHECK: str r0, [sp, #12] +;CHECK: add r0, sp, #12 ;CHECK: bl fooUseParam ;CHECK: pop {r11, lr} ;CHECK: add sp, sp, #8 diff --git a/llvm/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/llvm/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll index 6db71fed958e..e79a3ba741ec 100644 --- a/llvm/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll +++ b/llvm/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll @@ -23,9 +23,9 @@ define void @foo(double %vfp0, ; --> D0, NSAA=SP entry: ;CHECK: sub sp, #8 ;CHECK: push.w {r11, lr} - ;CHECK: add r0, sp, #16 - ;CHECK: str r2, [sp, #20] - ;CHECK: str r1, [sp, #16] + ;CHECK: add r0, sp, #8 + ;CHECK: str r2, [sp, #12] + ;CHECK: str r1, [sp, #8] ;CHECK: bl fooUseStruct call void @fooUseStruct(%st_t* %p1) ret void diff --git a/llvm/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll b/llvm/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll new file mode 100644 index 000000000000..054a45ced1d0 --- /dev/null +++ b/llvm/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll @@ -0,0 +1,114 @@ +; RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s + +%struct4bytes = type { i32 } +%struct8bytes8align = type { i64 } +%struct12bytes = type { i32, i32, i32 } + +declare void @useIntPtr(%struct4bytes*) +declare void @useLong(i64) +declare void @usePtr(%struct8bytes8align*) + +; a -> r0 +; b -> r1..r3 +; c -> sp+0..sp+7 +define void @foo1(i32 %a, %struct12bytes* byval %b, i64 %c) { +; CHECK-LABEL: foo1 +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: add [[SCRATCH:r[0-9]+]], sp, #12 +; CHECK: stm [[SCRATCH]], {r1, r2, r3} +; CHECK: ldr r0, [sp, #24] +; CHECK: ldr r1, [sp, #28] +; CHECK: bl useLong +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #16 + + tail call void @useLong(i64 %c) + ret void +} + +; a -> r0 +; b -> r2..r3 +define void @foo2(i32 %a, %struct8bytes8align* byval %b) { +; CHECK-LABEL: foo2 +; CHECK: sub sp, sp, #8 +; CHECK: push {r11, lr} +; CHECK: add r0, sp, #8 +; CHECK: str r3, [sp, #12] +; CHECK: str r2, [sp, #8] +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #8 + + tail call void @usePtr(%struct8bytes8align* %b) + ret void +} + +; a -> r0..r1 +; b -> r2 +define void @foo3(%struct8bytes8align* byval %a, %struct4bytes* byval %b) { +; CHECK-LABEL: foo3 +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: add [[SCRATCH:r[0-9]+]], sp, #8 +; CHECK: stm [[SCRATCH]], {r0, r1, r2} +; CHECK: add r0, sp, #8 +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #16 + + tail call void @usePtr(%struct8bytes8align* %a) + ret void +} + +; a -> r0 +; b -> r2..r3 +define void @foo4(%struct4bytes* byval %a, %struct8bytes8align* byval %b) { +; CHECK-LABEL: foo4 +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: str r0, [sp, #8] +; CHECK: add r0, sp, #16 +; CHECK: str r3, [sp, #20] +; CHECK: str r2, [sp, #16] +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #16 +; CHECK: mov pc, lr + + tail call void @usePtr(%struct8bytes8align* %b) + ret void +} + +; a -> r0..r1 +; b -> r2 +; c -> r3 +define void @foo5(%struct8bytes8align* byval %a, %struct4bytes* byval %b, %struct4bytes* byval %c) { +; CHECK-LABEL: foo5 +; CHECK: sub sp, sp, #16 +; CHECK: push {r11, lr} +; CHECK: add [[SCRATCH:r[0-9]+]], sp, #8 +; CHECK: stm [[SCRATCH]], {r0, r1, r2, r3} +; CHECK: add r0, sp, #8 +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: add sp, sp, #16 +; CHECK: mov pc, lr + + tail call void @usePtr(%struct8bytes8align* %a) + ret void +} + +; a..c -> r0..r2 +; d -> sp+0..sp+7 +define void @foo6(i32 %a, i32 %b, i32 %c, %struct8bytes8align* byval %d) { +; CHECK-LABEL: foo6 +; CHECK: push {r11, lr} +; CHECK: add r0, sp, #8 +; CHECK: bl usePtr +; CHECK: pop {r11, lr} +; CHECK: mov pc, lr + + tail call void @usePtr(%struct8bytes8align* %d) + ret void +}