[Hexagon] Expand vgather pseudos during packetization

This will allow packetizing the vgather expansion with other instructions.

llvm-svn: 340028
This commit is contained in:
Krzysztof Parzyszek 2018-08-17 14:24:24 +00:00
parent 3291f9aa81
commit 39a979c838
9 changed files with 157 additions and 212 deletions

View File

@ -29,7 +29,6 @@ add_llvm_target(HexagonCodeGen
HexagonExpandCondsets.cpp
HexagonFixupHwLoops.cpp
HexagonFrameLowering.cpp
HexagonGatherPacketize.cpp
HexagonGenExtract.cpp
HexagonGenInsert.cpp
HexagonGenMux.cpp

View File

@ -1,104 +0,0 @@
//===- HexagonGatherPacketize.cpp -----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This pass ensures that producer and consumer of VTMP are paired in a bundle.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "gather-packetize"
#include "HexagonTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
cl::opt<bool> EnableGatherPacketize(
"hexagon-enable-gather-packetize", cl::Hidden, cl::init(true),
cl::desc("Generate gather packets before packetization"));
namespace llvm {
FunctionPass *createHexagonGatherPacketize();
void initializeHexagonGatherPacketizePass(PassRegistry &);
}
namespace {
class HexagonGatherPacketize : public MachineFunctionPass {
public:
static char ID;
HexagonGatherPacketize() : MachineFunctionPass(ID) {
PassRegistry &Registry = *PassRegistry::getPassRegistry();
initializeHexagonGatherPacketizePass(Registry);
}
StringRef getPassName() const override {
return "Hexagon Gather Packetize Code";
}
bool runOnMachineFunction(MachineFunction &Fn) override;
};
char HexagonGatherPacketize::ID = 0;
static inline bool isVtmpDef(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isDef() && MO.isImplicit() &&
(MO.getReg() == Hexagon::VTMP)) {
return true;
}
return false;
}
static inline bool isVtmpUse(const MachineInstr &MI) {
return (MI.mayStore() && (MI.getOperand(2)).isReg() &&
((MI.getOperand(2)).getReg() == Hexagon::VTMP));
}
bool HexagonGatherPacketize::runOnMachineFunction(MachineFunction &Fn) {
if (!EnableGatherPacketize)
return false;
auto &ST = Fn.getSubtarget<HexagonSubtarget>();
bool HasV65 = ST.hasV65Ops();
bool UseHVX = ST.useHVXOps();
if (!(HasV65 & UseHVX))
return false;
for (auto &MBB : Fn) {
bool VtmpDef = false;
MachineBasicBlock::iterator MII, MIE, DefMII;
for (MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ++MII) {
MachineInstr &MI = *MII;
if (VtmpDef) {
if (!isVtmpUse(MI))
continue;
MBB.splice(std::next(DefMII), &MBB, MII);
finalizeBundle(MBB, DefMII.getInstrIterator(),
std::next(MII).getInstrIterator());
VtmpDef = false;
continue;
}
if (!(isVtmpDef(MI)))
continue;
VtmpDef = true;
DefMII = MII;
}
assert(!VtmpDef && "VTMP producer and consumer not in same block");
}
return true;
}
}
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
INITIALIZE_PASS(HexagonGatherPacketize, "hexagon-gather-packetize",
"Hexagon gather packetize Code", false, false)
FunctionPass *llvm::createHexagonGatherPacketize() {
return new HexagonGatherPacketize();
}

View File

@ -1341,81 +1341,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(Hexagon::J2_jumprfnew));
return true;
case Hexagon::V6_vgathermh_pseudo:
BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return true;
case Hexagon::V6_vgathermw_pseudo:
BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return true;
case Hexagon::V6_vgathermhw_pseudo:
BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return true;
case Hexagon::V6_vgathermhq_pseudo:
BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return true;
case Hexagon::V6_vgathermwq_pseudo:
BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return true;
case Hexagon::V6_vgathermhwq_pseudo:
BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return true;
case Hexagon::PS_loadrub_pci:
return RealCirc(Hexagon::L2_loadrub_pci, /*HasImm*/true, /*MxOp*/4);
case Hexagon::PS_loadrb_pci:
@ -1465,6 +1390,93 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return false;
}
MachineBasicBlock::instr_iterator
HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc = MI.getOpcode();
MachineBasicBlock::iterator First;
switch (Opc) {
case Hexagon::V6_vgathermh_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermw_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermhw_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermhq_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermwq_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermhwq_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
.add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
.addImm(0)
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
}
return MI.getIterator();
}
// We indicate that we want to reverse the branch by
// inserting the reversed branching opcode.
bool HexagonInstrInfo::reverseBranchCondition(

View File

@ -472,6 +472,8 @@ public:
uint64_t getType(const MachineInstr &MI) const;
unsigned getUnits(const MachineInstr &MI) const;
MachineBasicBlock::instr_iterator expandVGatherPseudo(MachineInstr &MI) const;
/// getInstrTimingClassLatency - Compute the instruction latency of a given
/// instruction using Timing Class information, if available.
unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;

View File

@ -153,7 +153,6 @@ namespace llvm {
FunctionPass *createHexagonCopyToCombine();
FunctionPass *createHexagonEarlyIfConversion();
FunctionPass *createHexagonFixupHwLoops();
FunctionPass *createHexagonGatherPacketize();
FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
FunctionPass *createHexagonGenMux();
@ -165,7 +164,7 @@ namespace llvm {
FunctionPass *createHexagonNewValueJump();
FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonOptAddrMode();
FunctionPass *createHexagonPacketizer();
FunctionPass *createHexagonPacketizer(bool Minimal);
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonRDFOpt();
FunctionPass *createHexagonSplitConst32AndConst64();
@ -409,7 +408,6 @@ void HexagonPassConfig::addPreEmitPass() {
addPass(createHexagonBranchRelaxation());
// Create Packets.
if (!NoOpt) {
if (!DisableHardwareLoops)
addPass(createHexagonFixupHwLoops());
@ -418,12 +416,8 @@ void HexagonPassConfig::addPreEmitPass() {
addPass(createHexagonGenMux());
}
// Create packets for 2 instructions that consitute a gather instruction.
// Do this regardless of the opt level.
addPass(createHexagonGatherPacketize(), false);
if (!NoOpt)
addPass(createHexagonPacketizer(), false);
// Packetization is mandatory: it handles gather/scatter at all opt levels.
addPass(createHexagonPacketizer(NoOpt), false);
if (EnableVectorPrint)
addPass(createHexagonVectorPrint(), false);

View File

@ -77,7 +77,7 @@ extern cl::opt<bool> ScheduleInlineAsm;
namespace llvm {
FunctionPass *createHexagonPacketizer();
FunctionPass *createHexagonPacketizer(bool Minimal);
void initializeHexagonPacketizerPass(PassRegistry&);
} // end namespace llvm
@ -88,7 +88,8 @@ namespace {
public:
static char ID;
HexagonPacketizer() : MachineFunctionPass(ID) {}
HexagonPacketizer(bool Min = false)
: MachineFunctionPass(ID), Minimal(Min) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@ -112,6 +113,7 @@ namespace {
private:
const HexagonInstrInfo *HII;
const HexagonRegisterInfo *HRI;
const bool Minimal;
};
} // end anonymous namespace
@ -129,8 +131,9 @@ INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
MachineLoopInfo &MLI, AliasAnalysis *AA,
const MachineBranchProbabilityInfo *MBPI)
: VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
const MachineBranchProbabilityInfo *MBPI, bool Minimal)
: VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
Minimal(Minimal) {
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
@ -200,9 +203,6 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
auto &HST = MF.getSubtarget<HexagonSubtarget>();
if (DisablePacketizer || !HST.usePackets() || skipFunction(MF.getFunction()))
return false;
HII = HST.getInstrInfo();
HRI = HST.getRegisterInfo();
auto &MLI = getAnalysis<MachineLoopInfo>();
@ -213,7 +213,9 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
HII->genAllInsnTimingClasses(MF);
// Instantiate the packetizer.
HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
bool MinOnly = Minimal || DisablePacketizer || !HST.usePackets() ||
skipFunction(MF.getFunction());
HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@ -226,7 +228,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
// Here, Insn 1 will result in the dependence graph not emitting an output
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
for (auto &MB : MF) {
for (MachineBasicBlock &MB : MF) {
auto End = MB.end();
auto MI = MB.begin();
while (MI != End) {
@ -1759,8 +1761,8 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
}
void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MI) {
// Replace VLIWPacketizerList::endPacket(MBB, MI).
MachineBasicBlock::iterator EndMI) {
// Replace VLIWPacketizerList::endPacket(MBB, EndMI).
bool memShufDisabled = getmemShufDisabled();
if (memShufDisabled && !foundLSInPacket()) {
@ -1769,25 +1771,32 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
}
memShufDisabled = getmemShufDisabled();
if (CurrentPacketMIs.size() > 1) {
MachineBasicBlock::instr_iterator FirstMI(CurrentPacketMIs.front());
MachineBasicBlock::instr_iterator LastMI(MI.getInstrIterator());
finalizeBundle(*MBB, FirstMI, LastMI);
OldPacketMIs.clear();
for (MachineInstr *MI : CurrentPacketMIs) {
MachineBasicBlock::instr_iterator NextMI = std::next(MI->getIterator());
for (auto &I : make_range(HII->expandVGatherPseudo(*MI), NextMI))
OldPacketMIs.push_back(&I);
}
CurrentPacketMIs.clear();
if (OldPacketMIs.size() > 1) {
MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front());
MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator());
finalizeBundle(*MBB, FirstMI, LastMI);
auto BundleMII = std::prev(FirstMI);
if (memShufDisabled)
HII->setBundleNoShuf(BundleMII);
setmemShufDisabled(false);
}
OldPacketMIs = CurrentPacketMIs;
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
LLVM_DEBUG(dbgs() << "End packet\n");
}
bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
if (Minimal)
return false;
return !producesStall(MI);
}
@ -1860,6 +1869,6 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
FunctionPass *llvm::createHexagonPacketizer() {
return new HexagonPacketizer();
FunctionPass *llvm::createHexagonPacketizer(bool Minimal) {
return new HexagonPacketizer(Minimal);
}

View File

@ -66,11 +66,13 @@ protected:
private:
const HexagonInstrInfo *HII;
const HexagonRegisterInfo *HRI;
const bool Minimal;
public:
HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
AliasAnalysis *AA,
const MachineBranchProbabilityInfo *MBPI);
const MachineBranchProbabilityInfo *MBPI,
bool Minimal);
// initPacketizerState - initialize some internal flags.
void initPacketizerState() override;

View File

@ -12,11 +12,11 @@ target triple = "hexagon"
; CHECK-NEXT: m0 = r2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: vtmp.w = vgather(r1,m0,v0.w).w
; CHECK-NEXT: vmem(r0+#0) = vtmp.new
; CHECK-NEXT: r29 = and(r29,#-64)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r29 = and(r29,#-64)
; CHECK-NEXT: vtmp.w = vgather(r1,m0,v0.w).w
; CHECK-NEXT: vmem(r0+#0) = vtmp.new
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = add(r29,#0)

View File

@ -0,0 +1,31 @@
# RUN: llc -march=hexagon -mcpu=hexagonv65 -start-before hexagon-packetizer -o - %s | FileCheck %s
# Check that the vgather pseudo was expanded and packetized with the
# surrounding instructions.
# CHECK: r1 = add(r1,#1)
# CHECK-NEXT: vtmp.h = vgather
# CHECK-NEXT: vmem(r0+#0) = vtmp.new
# CHECK-NEXT: } :endloop0
name: fred
tracksRegLiveness: true
body: |
bb.0:
liveins: $r0, $w0
$r1 = A2_tfrsi 2
$r2 = A2_tfrsi 1
$m0 = A2_tfrrcr killed $r1
J2_loop0i %bb.1, 128, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
bb.1:
liveins: $r0, $r1, $r2, $m0, $w0
$r1 = A2_addi $r1, 1
V6_vgathermhw_pseudo killed $r0, killed $r2, killed $m0, killed $w0, implicit-def $vtmp
ENDLOOP0 %bb.1, implicit $lc0, implicit $sa0, implicit-def $lc0, implicit-def $p3, implicit-def $pc, implicit-def $usr
bb.2:
liveins: $r1
$r0 = A2_tfr $r1
PS_jmpret killed $r31, implicit $r0, implicit-def dead $pc
...