From ccc7f982c145bc85b2d671117d3ef8f818de6896 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Wed, 7 May 2014 11:28:36 +0000 Subject: [PATCH] [ARM64-BE] Make big endian (scalar) argument passing work correctly. This completes the port of r204814 (cpirker "AArch64_BE function argument passing for ARM ABI") from AArch64 to ARM64, and fixes a bunch of issues found during later development along the way. The biggest of these was that the alignment fixup logic wasn't replicated into all the places it should have been. llvm-svn: 208192 --- llvm/lib/Target/ARM64/ARM64ISelLowering.cpp | 44 +++++++++++++++++--- llvm/test/CodeGen/AArch64/adc.ll | 1 + llvm/test/CodeGen/AArch64/func-argpassing.ll | 26 +++++++++--- llvm/test/CodeGen/AArch64/func-calls.ll | 6 ++- llvm/test/CodeGen/AArch64/mul-lohi.ll | 1 + llvm/test/CodeGen/ARM64/aapcs.ll | 4 +- 6 files changed, 67 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp b/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp index 19c76c643141..0f2efb35f334 100644 --- a/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -1678,8 +1678,10 @@ SDValue ARM64TargetLowering::LowerFormalArguments( int Size = Ins[i].Flags.getByValSize(); unsigned NumRegs = (Size + 7) / 8; + // FIXME: This works on big-endian for composite byvals, which are the common + // case. It should also work for fundamental types too. unsigned FrameIdx = - MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); + MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); InVals.push_back(FrameIdxN); @@ -1737,13 +1739,33 @@ SDValue ARM64TargetLowering::LowerFormalArguments( assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); unsigned ArgOffset = VA.getLocMemOffset(); unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; - int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true); + + uint32_t BEAlign = 0; + if (ArgSize < 8 && !Subtarget->isLittleEndian()) + BEAlign = 8 - ArgSize; + + int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), false, - false, false, 0)); + SDValue ArgValue; + + // If the loc type and val type are not the same, create an anyext load. + if (VA.getLocVT().getSizeInBits() != VA.getValVT().getSizeInBits()) { + // We should only get here if this is a pure integer. + assert(!VA.getValVT().isVector() && VA.getValVT().isInteger() && + "Only integer extension supported!"); + ArgValue = DAG.getExtLoad(ISD::EXTLOAD, DL, VA.getValVT(), Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + VA.getLocVT(), + false, false, false, 0); + } else { + ArgValue = DAG.getLoad(VA.getValVT(), DL, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), false, + false, false, 0); + } + + InVals.push_back(ArgValue); } } @@ -2089,8 +2111,18 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, // There's no reason we can't support stack args w/ tailcall, but // we currently don't, so assert if we see one. assert(!IsTailCall && "stack argument with tail call!?"); + + // FIXME: This works on big-endian for composite byvals, which are the common + // case. It should also work for fundamental types too. + uint32_t BEAlign = 0; + if (!Subtarget->isLittleEndian() && !Flags.isByVal()) { + unsigned OpSize = (VA.getLocVT().getSizeInBits() + 7) / 8; + if (OpSize < 8) + BEAlign = 8 - OpSize; + } + unsigned LocMemOffset = VA.getLocMemOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset + BEAlign); PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); if (Outs[i].Flags.isByVal()) { diff --git a/llvm/test/CodeGen/AArch64/adc.ll b/llvm/test/CodeGen/AArch64/adc.ll index 01922ede2289..8742e450897c 100644 --- a/llvm/test/CodeGen/AArch64/adc.ll +++ b/llvm/test/CodeGen/AArch64/adc.ll @@ -1,6 +1,7 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s define i128 @test_simple(i128 %a, i128 %b, i128 %c) { ; CHECK-LABEL: test_simple: diff --git a/llvm/test/CodeGen/AArch64/func-argpassing.ll b/llvm/test/CodeGen/AArch64/func-argpassing.ll index e2109e658f7f..5b3e6c89db6e 100644 --- a/llvm/test/CodeGen/AArch64/func-argpassing.ll +++ b/llvm/test/CodeGen/AArch64/func-argpassing.ll @@ -1,9 +1,12 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-AARCH64 --check-prefix=CHECK-LE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE-AARCH64 --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s + ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM64-BE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s %myStruct = type { i64 , i8, i32 } @@ -152,7 +155,7 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var %retval = load volatile i32* %stacked ret i32 %retval ; CHECK-LE: ldr w0, [sp, #16] -; CHECK-BE: ldr w0, [sp, #20] +; CHECK-BE-AARCH64: ldr w0, [sp, #20] } define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, @@ -162,8 +165,10 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, store float %var8, float* @varfloat ; Beware as above: the offset would be different on big-endian ; machines if the first ldr were changed to use s-registers. -; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] -; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] +; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] +; CHECK-AARCH64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] +; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] +; CHECK-AARCH64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] ret void } @@ -188,7 +193,7 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ; Nothing local on stack in current codegen, so first stack is 16 away ; CHECK-LE: add x[[REG:[0-9]+]], sp, #16 ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8] -; CHECK-BE: ldr {{x[0-9]+}}, [sp, #24] +; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24] ; Important point is that we address sp+24 for second dword ; CHECK-AARCH64: ldr {{x[0-9]+}}, [sp, #16] @@ -205,3 +210,14 @@ define i32 @test_extern() { ; CHECK: bl memcpy ret i32 0 } + + +; A sub-i32 stack argument must be loaded on big endian with ldr{h,b}, not just +; implicitly extended to a 32-bit load. +define i16 @stacked_i16(i32 %val0, i32 %val1, i32 %val2, i32 %val3, + i32 %val4, i32 %val5, i32 %val6, i32 %val7, + i16 %stack1) { +; CHECK-LABEL: stacked_i16 +; CHECK-ARM64-BE: ldrh + ret i16 %stack1 +} diff --git a/llvm/test/CodeGen/AArch64/func-calls.ll b/llvm/test/CodeGen/AArch64/func-calls.ll index 26c705700a61..d216e3239d59 100644 --- a/llvm/test/CodeGen/AArch64/func-calls.ll +++ b/llvm/test/CodeGen/AArch64/func-calls.ll @@ -2,9 +2,11 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s + ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s %myStruct = type { i64 , i8, i32 } @@ -149,9 +151,9 @@ define void @check_i128_align() { call void @check_i128_regalign(i32 0, i128 42) ; CHECK-NOT: mov x1 -; CHECK-LE: movz x2, #42 +; CHECK-LE: movz x2, #{{0x2a|42}} ; CHECK-LE: mov x3, xzr -; CHECK-BE: movz x3, #42 +; CHECK-BE: movz {{x|w}}3, #{{0x2a|42}} ; CHECK-BE: mov x2, xzr ; CHECK: bl check_i128_regalign diff --git a/llvm/test/CodeGen/AArch64/mul-lohi.ll b/llvm/test/CodeGen/AArch64/mul-lohi.ll index e9493efe8fd0..3b027f2d4f10 100644 --- a/llvm/test/CodeGen/AArch64/mul-lohi.ll +++ b/llvm/test/CodeGen/AArch64/mul-lohi.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s ; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s ; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s define i128 @test_128bitmul(i128 %lhs, i128 %rhs) { ; CHECK-LABEL: test_128bitmul: diff --git a/llvm/test/CodeGen/ARM64/aapcs.ll b/llvm/test/CodeGen/ARM64/aapcs.ll index bd206a48b732..b713f0d5a531 100644 --- a/llvm/test/CodeGen/ARM64/aapcs.ll +++ b/llvm/test/CodeGen/ARM64/aapcs.ll @@ -21,7 +21,7 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, %ext_bool = zext i1 %bool to i64 store volatile i64 %ext_bool, i64* @var64, align 8 -; CHECK: ldr w[[EXT:[0-9]+]], [sp] +; CHECK: ldrb w[[EXT:[0-9]+]], [sp] ; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1 ; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64] @@ -37,7 +37,7 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, %ext_int = zext i32 %int to i64 store volatile i64 %ext_int, i64* @var64, align 8 -; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24] +; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24] ; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] store volatile i64 %long, i64* @var64, align 8