[AArch64] Add new target feature to fuse literal generation

This feature enables the fusion of such operations on Cortex A57, as
recommended in its Software Optimisation Guide, sections 4.14 and 4.15.

Differential revision: https://reviews.llvm.org/D28698

llvm-svn: 293739
This commit is contained in:
Evandro Menezes 2017-02-01 02:54:42 +00:00
parent b21fb29c26
commit 455382ea22
4 changed files with 46 additions and 0 deletions

View File

@ -107,6 +107,10 @@ def FeatureFuseAES : SubtargetFeature<
"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
def FeatureFuseLiterals : SubtargetFeature<
"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
@ -189,6 +193,7 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
FeatureFuseAES,
FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,

View File

@ -129,6 +129,31 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
}
if (ST.hasFuseLiterals())
// Fuse literal generation operations.
switch (FirstOpcode) {
// PC relative address.
case AArch64::ADRP:
return SecondOpcode == AArch64::ADDXri ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
// 32 bit immediate.
case AArch64::MOVZWi:
return (SecondOpcode == AArch64::MOVKWi &&
Second->getOperand(3).getImm() == 16) ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
// Lower half of 64 bit immediate.
case AArch64::MOVZXi:
return (SecondOpcode == AArch64::MOVKXi &&
Second->getOperand(3).getImm() == 16) ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
// Upper half of 64 bit immediate.
case AArch64::MOVKXi:
return First->getOperand(3).getImm() == 32 &&
((SecondOpcode == AArch64::MOVKXi &&
Second->getOperand(3).getImm() == 48) ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END);
}
return false;
}

View File

@ -85,6 +85,7 @@ protected:
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
bool HasFuseAES = false;
bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
uint8_t MaxInterleaveFactor = 2;
@ -199,6 +200,7 @@ public:
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
bool useRSqrt() const { return UseRSqrt; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {

View File

@ -330,6 +330,20 @@ public:
return DAG;
}
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
if (ST.hasFuseLiterals()) {
// Run the Macro Fusion after RA again since literals are expanded from
// pseudos then (v. addPreSched2()).
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
DAG->addMutation(createAArch64MacroFusionDAGMutation());
return DAG;
}
return nullptr;
}
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;