[Hexagon] Check for potential bank conflicts in post-RA scheduling

Insert artificial edges between loads that could cause a cache bank
conflict.

llvm-svn: 311901
This commit is contained in:
Krzysztof Parzyszek 2017-08-28 18:36:21 +00:00
parent 312c557b3b
commit 2164a271a3
4 changed files with 83 additions and 0 deletions

View File

@ -93,6 +93,10 @@ static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
cl::Hidden, cl::ZeroOrMore, cl::init(true));
static cl::opt<bool> EnableCheckBankConflict("hexagon-check-bank-conflict",
cl::Hidden, cl::ZeroOrMore, cl::init(true),
cl::desc("Enable checking for cache bank conflicts"));
void HexagonSubtarget::initializeEnvironment() {
UseMemOps = false;
@ -247,6 +251,52 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) {
}
}
void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
if (!EnableCheckBankConflict)
return;
const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
// Create artificial edges between loads that could likely cause a bank
// conflict. Since such loads would normally not have any dependency
// between them, we cannot rely on existing edges.
for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
SUnit &S0 = DAG->SUnits[i];
MachineInstr &L0 = *S0.getInstr();
if (!L0.mayLoad() || L0.mayStore() ||
HII.getAddrMode(L0) != HexagonII::BaseImmOffset)
continue;
int Offset0;
unsigned Size0;
unsigned Base0 = HII.getBaseAndOffset(L0, Offset0, Size0);
// Is the access size is longer than the L1 cache line, skip the check.
if (Base0 == 0 || Size0 >= 32)
continue;
// Scan only up to 32 instructions ahead (to avoid n^2 complexity).
for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
SUnit &S1 = DAG->SUnits[j];
MachineInstr &L1 = *S1.getInstr();
if (!L1.mayLoad() || L1.mayStore() ||
HII.getAddrMode(L1) != HexagonII::BaseImmOffset)
continue;
int Offset1;
unsigned Size1;
unsigned Base1 = HII.getBaseAndOffset(L1, Offset1, Size1);
if (Base1 == 0 || Size1 >= 32 || Base0 != Base1)
continue;
// Check bits 3 and 4 of the offset: if they differ, a bank conflict
// is unlikely.
if (((Offset0 ^ Offset1) & 0x18) != 0)
continue;
// Bits 3 and 4 are the same, add an artificial edge and set extra
// latency.
SDep A(&S0, SDep::Artificial);
A.setLatency(1);
S1.addPred(A, true);
}
}
}
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
@ -330,6 +380,7 @@ void HexagonSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
Mutations.push_back(llvm::make_unique<BankConflictMutation>());
}
void HexagonSubtarget::getSMSMutations(

View File

@ -68,6 +68,9 @@ public:
bool shouldTFRICallBind(const HexagonInstrInfo &HII,
const SUnit &Inst1, const SUnit &Inst2) const;
};
struct BankConflictMutation : public ScheduleDAGMutation {
void apply(ScheduleDAGInstrs *DAG) override;
};
private:
std::string CPUString;

View File

@ -105,6 +105,7 @@ HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
addMutation(make_unique<HexagonSubtarget::UsrOverflowMutation>());
addMutation(make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
addMutation(make_unique<HexagonSubtarget::BankConflictMutation>());
}
// Check if FirstI modifies a register that SecondI reads.

View File

@ -0,0 +1,28 @@
# RUN: llc -march=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s
# The two loads from %a (%r0) can cause a bank conflict. Check that they
# are not scheduled next to each other.
# CHECK: L2_loadri_io %r0, 8
# CHECK: L2_loadri_io killed %r1, 0
# CHECK: L2_loadri_io killed %r0, 12
--- |
define void @foo(i32* %a, i32* %b) {
ret void
}
...
---
name: foo
tracksRegLiveness: true
body: |
bb.0:
liveins: %r0, %r1
%r2 = L2_loadri_io %r0, 8 :: (load 4 from %ir.a)
%r3 = L2_loadri_io killed %r0, 12 :: (load 4 from %ir.a)
%r4 = L2_loadri_io killed %r1, 0 :: (load 4 from %ir.b)
...