[ARM]: Assign cost of scaling used in addressing mode for ARM cores

This patch assigns cost of the scaling used in addressing.
On many ARM cores, a negated register offset takes longer than a
non-negated register offset, in a register-offset addressing mode.

For instance:

LDR R0, [R1, R2 LSL #2]
LDR R0, [R1, -R2 LSL #2]

Above, (1) takes less cycles than (2).

By assigning appropriate scaling factor cost, we enable the LLVM
to make the right trade-offs in the optimization and code-selection phase.

Differential Revision: http://reviews.llvm.org/D24857

Reviewers: jmolloy, rengolin
llvm-svn: 284127
This commit is contained in:
Javed Absar 2016-10-13 14:57:43 +00:00
parent 1d4b163fc0
commit 85874a9360
5 changed files with 35 additions and 2 deletions

View File

@ -99,6 +99,8 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
// Not to be confused with FeatureHasRetAddrStack (return address stack)
def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Enable Reliability, Availability and Serviceability extensions">;
def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
"Enable fast computation of positive address offsets">;
// Cyclone has preferred instructions for zeroing VFP registers, which can
@ -773,13 +775,15 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
FeatureHWDiv,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
FeatureCRC,
FeatureFPAO]>;
def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
FeatureHWDiv,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
FeatureCRC,
FeatureFPAO]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
FeatureHWDiv,

View File

@ -11612,6 +11612,17 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
return true;
}
int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
if (isLegalAddressingMode(DL, AM, Ty, AS)) {
if (Subtarget->hasFPAO())
return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
return 0;
}
return -1;
}
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)

View File

@ -291,6 +291,14 @@ namespace llvm {
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
/// getScalingFactorCost - Return the cost of the scaling used in
/// addressing mode represented by AM.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, the return value must be negative.
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal

View File

@ -235,6 +235,9 @@ protected:
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing = false;
/// HasFPAO - if true, processor does positive address offset computation faster
bool HasFPAO = false;
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
@ -453,6 +456,7 @@ public:
bool hasTrustZone() const { return HasTrustZone; }
bool has8MSecExt() const { return Has8MSecExt; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
bool hasFPAO() const { return HasFPAO; }
bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; }
bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; }
bool hasSlowVDUP32() const { return HasSlowVDUP32; }

View File

@ -1,6 +1,10 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
; Should use scaled addressing mode.
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a53 %s -o - | FileCheck %s -check-prefix CHECK-NONEGOFF-A53
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a57 %s -o - | FileCheck %s -check-prefix CHECK-NONEGOFF-A57
; Should not generate negated register offset
define void @sintzero(i32* %a) nounwind {
entry:
store i32 0, i32* %a
@ -19,4 +23,6 @@ return: ; preds = %cond_next
}
; CHECK: lsl{{.*}}#2]
; CHECK-NONEGOFF-A53: [{{r[0-9]+}}, {{r[0-9]+}}, lsl{{.*}}#2]
; CHECK-NONEGOFF-A57: [{{r[0-9]+}}, {{r[0-9]+}}, lsl{{.*}}#2]