From 85874a9360334ddb9619aca6344b8ee53296fa1e Mon Sep 17 00:00:00 2001 From: Javed Absar Date: Thu, 13 Oct 2016 14:57:43 +0000 Subject: [PATCH] [ARM]: Assign cost of scaling used in addressing mode for ARM cores This patch assigns cost of the scaling used in addressing. On many ARM cores, a negated register offset takes longer than a non-negated register offset, in a register-offset addressing mode. For instance: LDR R0, [R1, R2 LSL #2] LDR R0, [R1, -R2 LSL #2] Above, (1) takes less cycles than (2). By assigning appropriate scaling factor cost, we enable the LLVM to make the right trade-offs in the optimization and code-selection phase. Differential Revision: http://reviews.llvm.org/D24857 Reviewers: jmolloy, rengolin llvm-svn: 284127 --- llvm/lib/Target/ARM/ARM.td | 8 ++++++-- llvm/lib/Target/ARM/ARMISelLowering.cpp | 11 +++++++++++ llvm/lib/Target/ARM/ARMISelLowering.h | 8 ++++++++ llvm/lib/Target/ARM/ARMSubtarget.h | 4 ++++ llvm/test/CodeGen/ARM/lsr-scale-addr-mode.ll | 6 ++++++ 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 240306e71592..d0da6d8ec4ac 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -99,6 +99,8 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", // Not to be confused with FeatureHasRetAddrStack (return address stack) def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability and Serviceability extensions">; +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of positive address offsets">; // Cyclone has preferred instructions for zeroing VFP registers, which can @@ -773,13 +775,15 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, FeatureHWDiv, FeatureHWDivARM, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFPAO]>; def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, FeatureHWDiv, FeatureHWDivARM, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFPAO]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, FeatureHWDiv, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 7623841a8e34..a41c4fcb9cd0 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -11612,6 +11612,17 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { return true; } +int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { + if (isLegalAddressingMode(DL, AM, Ty, AS)) { + if (Subtarget->hasFPAO()) + return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster + return 0; + } + return -1; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 191235d491dc..75da35843379 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -291,6 +291,14 @@ namespace llvm { /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + + /// getScalingFactorCost - Return the cost of the scaling used in + /// addressing mode represented by AM. + /// If the AM is supported, the return value must be >= 0. + /// If the AM is not supported, the return value must be negative. + int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; + bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 04e5ff288ba9..cf53831defe1 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -235,6 +235,9 @@ protected: /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing = false; + /// HasFPAO - if true, processor does positive address offset computation faster + bool HasFPAO = false; + /// If true, if conversion may decide to leave some instructions unpredicated. bool IsProfitableToUnpredicate = false; @@ -453,6 +456,7 @@ public: bool hasTrustZone() const { return HasTrustZone; } bool has8MSecExt() const { return Has8MSecExt; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool hasFPAO() const { return HasFPAO; } bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } bool hasSlowVDUP32() const { return HasSlowVDUP32; } diff --git a/llvm/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/llvm/test/CodeGen/ARM/lsr-scale-addr-mode.ll index 063ef76d95c9..d8c979c8cd65 100644 --- a/llvm/test/CodeGen/ARM/lsr-scale-addr-mode.ll +++ b/llvm/test/CodeGen/ARM/lsr-scale-addr-mode.ll @@ -1,6 +1,10 @@ ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s ; Should use scaled addressing mode. +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a53 %s -o - | FileCheck %s -check-prefix CHECK-NONEGOFF-A53 +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a57 %s -o - | FileCheck %s -check-prefix CHECK-NONEGOFF-A57 +; Should not generate negated register offset + define void @sintzero(i32* %a) nounwind { entry: store i32 0, i32* %a @@ -19,4 +23,6 @@ return: ; preds = %cond_next } ; CHECK: lsl{{.*}}#2] +; CHECK-NONEGOFF-A53: [{{r[0-9]+}}, {{r[0-9]+}}, lsl{{.*}}#2] +; CHECK-NONEGOFF-A57: [{{r[0-9]+}}, {{r[0-9]+}}, lsl{{.*}}#2]