[AArch64] Add new target feature to fuse literal generation
This feature enables the fusion of such operations on Cortex A57, as recommended in its Software Optimisation Guide, sections 4.14 and 4.15. Differential revision: https://reviews.llvm.org/D28698 llvm-svn: 293739
This commit is contained in:
parent
b21fb29c26
commit
455382ea22
|
@ -107,6 +107,10 @@ def FeatureFuseAES : SubtargetFeature<
|
|||
"fuse-aes", "HasFuseAES", "true",
|
||||
"CPU fuses AES crypto operations">;
|
||||
|
||||
def FeatureFuseLiterals : SubtargetFeature<
|
||||
"fuse-literals", "HasFuseLiterals", "true",
|
||||
"CPU fuses literal generation operations">;
|
||||
|
||||
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
|
||||
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
|
||||
"Disable latency scheduling heuristic">;
|
||||
|
@ -189,6 +193,7 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
|
|||
FeatureCustomCheapAsMoveHandling,
|
||||
FeatureFPARMv8,
|
||||
FeatureFuseAES,
|
||||
FeatureFuseLiterals,
|
||||
FeatureNEON,
|
||||
FeaturePerfMon,
|
||||
FeaturePostRAScheduler,
|
||||
|
|
|
@ -129,6 +129,31 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
|
|||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
|
||||
}
|
||||
|
||||
if (ST.hasFuseLiterals())
|
||||
// Fuse literal generation operations.
|
||||
switch (FirstOpcode) {
|
||||
// PC relative address.
|
||||
case AArch64::ADRP:
|
||||
return SecondOpcode == AArch64::ADDXri ||
|
||||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
|
||||
// 32 bit immediate.
|
||||
case AArch64::MOVZWi:
|
||||
return (SecondOpcode == AArch64::MOVKWi &&
|
||||
Second->getOperand(3).getImm() == 16) ||
|
||||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
|
||||
// Lower half of 64 bit immediate.
|
||||
case AArch64::MOVZXi:
|
||||
return (SecondOpcode == AArch64::MOVKXi &&
|
||||
Second->getOperand(3).getImm() == 16) ||
|
||||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
|
||||
// Upper half of 64 bit immediate.
|
||||
case AArch64::MOVKXi:
|
||||
return First->getOperand(3).getImm() == 32 &&
|
||||
((SecondOpcode == AArch64::MOVKXi &&
|
||||
Second->getOperand(3).getImm() == 48) ||
|
||||
SecondOpcode == AArch64::INSTRUCTION_LIST_END);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,6 +85,7 @@ protected:
|
|||
bool HasArithmeticBccFusion = false;
|
||||
bool HasArithmeticCbzFusion = false;
|
||||
bool HasFuseAES = false;
|
||||
bool HasFuseLiterals = false;
|
||||
bool DisableLatencySchedHeuristic = false;
|
||||
bool UseRSqrt = false;
|
||||
uint8_t MaxInterleaveFactor = 2;
|
||||
|
@ -199,6 +200,7 @@ public:
|
|||
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
|
||||
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
|
||||
bool hasFuseAES() const { return HasFuseAES; }
|
||||
bool hasFuseLiterals() const { return HasFuseLiterals; }
|
||||
bool useRSqrt() const { return UseRSqrt; }
|
||||
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
|
||||
unsigned getVectorInsertExtractBaseCost() const {
|
||||
|
|
|
@ -330,6 +330,20 @@ public:
|
|||
return DAG;
|
||||
}
|
||||
|
||||
ScheduleDAGInstrs *
|
||||
createPostMachineScheduler(MachineSchedContext *C) const override {
|
||||
const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
|
||||
if (ST.hasFuseLiterals()) {
|
||||
// Run the Macro Fusion after RA again since literals are expanded from
|
||||
// pseudos then (v. addPreSched2()).
|
||||
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
|
||||
DAG->addMutation(createAArch64MacroFusionDAGMutation());
|
||||
return DAG;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void addIRPasses() override;
|
||||
bool addPreISel() override;
|
||||
bool addInstSelector() override;
|
||||
|
|
Loading…
Reference in New Issue