diff --git a/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp b/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp index 19c76c643141..0f2efb35f334 100644 --- a/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -1678,8 +1678,10 @@ SDValue ARM64TargetLowering::LowerFormalArguments( int Size = Ins[i].Flags.getByValSize(); unsigned NumRegs = (Size + 7) / 8; + // FIXME: This works on big-endian for composite byvals, which are the common + // case. It should also work for fundamental types too. unsigned FrameIdx = - MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); + MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); InVals.push_back(FrameIdxN); @@ -1737,13 +1739,33 @@ SDValue ARM64TargetLowering::LowerFormalArguments( assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); unsigned ArgOffset = VA.getLocMemOffset(); unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; - int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true); + + uint32_t BEAlign = 0; + if (ArgSize < 8 && !Subtarget->isLittleEndian()) + BEAlign = 8 - ArgSize; + + int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), false, - false, false, 0)); + SDValue ArgValue; + + // If the loc type and val type are not the same, create an anyext load. + if (VA.getLocVT().getSizeInBits() != VA.getValVT().getSizeInBits()) { + // We should only get here if this is a pure integer. + assert(!VA.getValVT().isVector() && VA.getValVT().isInteger() && + "Only integer extension supported!"); + ArgValue = DAG.getExtLoad(ISD::EXTLOAD, DL, VA.getValVT(), Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + VA.getLocVT(), + false, false, false, 0); + } else { + ArgValue = DAG.getLoad(VA.getValVT(), DL, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), false, + false, false, 0); + } + + InVals.push_back(ArgValue); } } @@ -2089,8 +2111,18 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, // There's no reason we can't support stack args w/ tailcall, but // we currently don't, so assert if we see one. assert(!IsTailCall && "stack argument with tail call!?"); + + // FIXME: This works on big-endian for composite byvals, which are the common + // case. It should also work for fundamental types too. + uint32_t BEAlign = 0; + if (!Subtarget->isLittleEndian() && !Flags.isByVal()) { + unsigned OpSize = (VA.getLocVT().getSizeInBits() + 7) / 8; + if (OpSize < 8) + BEAlign = 8 - OpSize; + } + unsigned LocMemOffset = VA.getLocMemOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset + BEAlign); PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); if (Outs[i].Flags.isByVal()) { diff --git a/llvm/test/CodeGen/AArch64/adc.ll b/llvm/test/CodeGen/AArch64/adc.ll index 01922ede2289..8742e450897c 100644 --- a/llvm/test/CodeGen/AArch64/adc.ll +++ b/llvm/test/CodeGen/AArch64/adc.ll @@ -1,6 +1,7 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s define i128 @test_simple(i128 %a, i128 %b, i128 %c) { ; CHECK-LABEL: test_simple: diff --git a/llvm/test/CodeGen/AArch64/func-argpassing.ll b/llvm/test/CodeGen/AArch64/func-argpassing.ll index e2109e658f7f..5b3e6c89db6e 100644 --- a/llvm/test/CodeGen/AArch64/func-argpassing.ll +++ b/llvm/test/CodeGen/AArch64/func-argpassing.ll @@ -1,9 +1,12 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-AARCH64 --check-prefix=CHECK-LE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE-AARCH64 --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s + ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM64-BE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s %myStruct = type { i64 , i8, i32 } @@ -152,7 +155,7 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var %retval = load volatile i32* %stacked ret i32 %retval ; CHECK-LE: ldr w0, [sp, #16] -; CHECK-BE: ldr w0, [sp, #20] +; CHECK-BE-AARCH64: ldr w0, [sp, #20] } define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, @@ -162,8 +165,10 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, store float %var8, float* @varfloat ; Beware as above: the offset would be different on big-endian ; machines if the first ldr were changed to use s-registers. -; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] -; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] +; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] +; CHECK-AARCH64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] +; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] +; CHECK-AARCH64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] ret void } @@ -188,7 +193,7 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ; Nothing local on stack in current codegen, so first stack is 16 away ; CHECK-LE: add x[[REG:[0-9]+]], sp, #16 ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8] -; CHECK-BE: ldr {{x[0-9]+}}, [sp, #24] +; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24] ; Important point is that we address sp+24 for second dword ; CHECK-AARCH64: ldr {{x[0-9]+}}, [sp, #16] @@ -205,3 +210,14 @@ define i32 @test_extern() { ; CHECK: bl memcpy ret i32 0 } + + +; A sub-i32 stack argument must be loaded on big endian with ldr{h,b}, not just +; implicitly extended to a 32-bit load. +define i16 @stacked_i16(i32 %val0, i32 %val1, i32 %val2, i32 %val3, + i32 %val4, i32 %val5, i32 %val6, i32 %val7, + i16 %stack1) { +; CHECK-LABEL: stacked_i16 +; CHECK-ARM64-BE: ldrh + ret i16 %stack1 +} diff --git a/llvm/test/CodeGen/AArch64/func-calls.ll b/llvm/test/CodeGen/AArch64/func-calls.ll index 26c705700a61..d216e3239d59 100644 --- a/llvm/test/CodeGen/AArch64/func-calls.ll +++ b/llvm/test/CodeGen/AArch64/func-calls.ll @@ -2,9 +2,11 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s + ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s %myStruct = type { i64 , i8, i32 } @@ -149,9 +151,9 @@ define void @check_i128_align() { call void @check_i128_regalign(i32 0, i128 42) ; CHECK-NOT: mov x1 -; CHECK-LE: movz x2, #42 +; CHECK-LE: movz x2, #{{0x2a|42}} ; CHECK-LE: mov x3, xzr -; CHECK-BE: movz x3, #42 +; CHECK-BE: movz {{x|w}}3, #{{0x2a|42}} ; CHECK-BE: mov x2, xzr ; CHECK: bl check_i128_regalign diff --git a/llvm/test/CodeGen/AArch64/mul-lohi.ll b/llvm/test/CodeGen/AArch64/mul-lohi.ll index e9493efe8fd0..3b027f2d4f10 100644 --- a/llvm/test/CodeGen/AArch64/mul-lohi.ll +++ b/llvm/test/CodeGen/AArch64/mul-lohi.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s ; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s ; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s define i128 @test_128bitmul(i128 %lhs, i128 %rhs) { ; CHECK-LABEL: test_128bitmul: diff --git a/llvm/test/CodeGen/ARM64/aapcs.ll b/llvm/test/CodeGen/ARM64/aapcs.ll index bd206a48b732..b713f0d5a531 100644 --- a/llvm/test/CodeGen/ARM64/aapcs.ll +++ b/llvm/test/CodeGen/ARM64/aapcs.ll @@ -21,7 +21,7 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, %ext_bool = zext i1 %bool to i64 store volatile i64 %ext_bool, i64* @var64, align 8 -; CHECK: ldr w[[EXT:[0-9]+]], [sp] +; CHECK: ldrb w[[EXT:[0-9]+]], [sp] ; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1 ; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64] @@ -37,7 +37,7 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, %ext_int = zext i32 %int to i64 store volatile i64 %ext_int, i64* @var64, align 8 -; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24] +; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24] ; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] store volatile i64 %long, i64* @var64, align 8