From b551aa4da514f71dc8c0e1e07a44b754d3a68d94 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 29 Mar 2010 23:24:21 +0000 Subject: [PATCH] Basic implementation of SSEDomainFix pass. Cross-block inference is primitive and wrong, but the pass is working otherwise. llvm-svn: 99848 --- llvm/lib/Target/X86/SSEDomainFix.cpp | 363 ++++++++++++++++++++++++++- llvm/lib/Target/X86/X86InstrInfo.cpp | 80 +++--- llvm/lib/Target/X86/X86InstrInfo.h | 11 +- 3 files changed, 398 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/X86/SSEDomainFix.cpp b/llvm/lib/Target/X86/SSEDomainFix.cpp index 44205955d92c..3e456c236acf 100644 --- a/llvm/lib/Target/X86/SSEDomainFix.cpp +++ b/llvm/lib/Target/X86/SSEDomainFix.cpp @@ -29,12 +29,108 @@ using namespace llvm; namespace { + +/// Allocate objects from a pool, allow objects to be recycled, and provide a +/// way of deleting everything. +template +class PoolAllocator { + std::vector Pages, Avail; +public: + ~PoolAllocator() { Clear(); } + + T* Alloc() { + if (Avail.empty()) { + T *p = new T[PageSize]; + Pages.push_back(p); + Avail.reserve(PageSize); + for (unsigned n = 0; n != PageSize; ++n) + Avail.push_back(p+n); + } + T *p = Avail.back(); + Avail.pop_back(); + return p; + } + + // Allow object to be reallocated. It won't be reconstructed. + void Recycle(T *p) { + p->clear(); + Avail.push_back(p); + } + + // Destroy all objects, make sure there are no external pointers to them. + void Clear() { + Avail.clear(); + while (!Pages.empty()) { + delete[] Pages.back(); + Pages.pop_back(); + } + } +}; + +/// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track +/// of execution domains. +/// +/// An open DomainValue represents a set of instructions that can still switch +/// execution domain. Multiple registers may refer to the same open +/// DomainValue - they will eventually be collapsed to the same execution +/// domain. +/// +/// A collapsed DomainValue represents a single register that has been forced +/// into one of more execution domains. There is a separate collapsed +/// DomainValue for each register, but it may contain multiple execution +/// domains. A register value is initially created in a single execution +/// domain, but if we were forced to pay the penalty of a domain crossing, we +/// keep track of the fact the the register is now available in multiple +/// domains. +struct DomainValue { + // Basic reference counting. + unsigned Refs; + + // Available domains. For an open DomainValue, it is the still possible + // domains for collapsing. For a collapsed DomainValue it is the domains where + // the register is available for free. + unsigned Mask; + + // Position of the last defining instruction. + unsigned Dist; + + // Twiddleable instructions using or defining these registers. + SmallVector Instrs; + + // Collapsed DomainValue have no instructions to twiddle - it simply keeps + // track of the domains where the registers are already available. + bool collapsed() const { return Instrs.empty(); } + + // Is any domain in mask available? + bool compat(unsigned mask) const { + return Mask & mask; + } + + // Mark domain as available + void add(unsigned domain) { + Mask |= 1u << domain; + } + + DomainValue() { clear(); } + + void clear() { + Refs = Mask = Dist = 0; + Instrs.clear(); + } +}; + class SSEDomainFixPass : public MachineFunctionPass { static char ID; - const X86InstrInfo *TII; + PoolAllocator Pool; MachineFunction *MF; + const X86InstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineBasicBlock *MBB; + bool hasLiveRegs; + DomainValue *LiveRegs[16]; + public: SSEDomainFixPass() : MachineFunctionPass(&ID) {} @@ -50,47 +146,288 @@ public: } private: + // Register mapping. + int RegIndex(unsigned Reg); + + // LiveRegs manipulations. + void SetLiveReg(int rx, DomainValue *DV); + void Kill(int rx); + void Force(int rx, unsigned domain); + void Collapse(DomainValue *dv, unsigned domain); + bool Merge(DomainValue *A, DomainValue *B); + void enterBasicBlock(MachineBasicBlock *MBB); + void visitGenericInstr(MachineInstr*); + void visitSoftInstr(MachineInstr*, unsigned mask); + void visitHardInstr(MachineInstr*, unsigned domain); + }; } char SSEDomainFixPass::ID = 0; +/// Translate TRI register number to an index into our smaller tables of +/// interesting registers. Return -1 for boring registers. +int SSEDomainFixPass::RegIndex(unsigned reg) { + // Registers are sorted lexicographically. + // We just need them to be consecutive, ordering doesn't matter. + assert(X86::XMM9 == X86::XMM0+15 && "Unexpected sort"); + reg -= X86::XMM0; + return reg < 16 ? reg : -1; +} + +/// Set LiveRegs[rx] = dv, updating reference counts. +void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) { + if (LiveRegs[rx] == dv) + return; + if (LiveRegs[rx]) { + assert(LiveRegs[rx]->Refs && "Bad refcount"); + if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]); + } + LiveRegs[rx] = dv; + if (dv) ++dv->Refs; +} + +// Kill register rx, recycle or collapse any DomainValue. +void SSEDomainFixPass::Kill(int rx) { + if (!LiveRegs[rx]) return; + + // Before killing the last reference to an open DomainValue, collapse it to + // the first available domain. + if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed()) + Collapse(LiveRegs[rx], CountTrailingZeros_32(LiveRegs[rx]->Mask)); + else + SetLiveReg(rx, 0); +} + +/// Force register rx into domain. +void SSEDomainFixPass::Force(int rx, unsigned domain) { + hasLiveRegs = true; + if (DomainValue *dv = LiveRegs[rx]) { + if (dv->collapsed()) + dv->add(domain); + else + Collapse(dv, domain); + } else { + // Set up basic collapsed DomainValue + DomainValue *dv = Pool.Alloc(); + dv->add(domain); + SetLiveReg(rx, dv); + } +} + +/// Collapse open DomainValue into given domain. If there are multiple +/// registers using dv, they each get a unique collapsed DomainValue. +void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) { + assert(dv->compat(1u << domain) && "Cannot collapse"); + + // Collapse all the instructions. + while (!dv->Instrs.empty()) { + MachineInstr *mi = dv->Instrs.back(); + TII->SetSSEDomain(mi, domain); + dv->Instrs.pop_back(); + } + dv->Mask = 1u << domain; + + // If there are multiple users, give them new, unique DomainValues. + if (dv->Refs > 1) { + for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx) + if (LiveRegs[rx] == dv) { + DomainValue *dv2 = Pool.Alloc(); + dv2->add(domain); + SetLiveReg(rx, dv2); + } + Pool.Recycle(dv); + } +} + +/// Merge - All instructions and registers in B are moved to A, and B is +/// released. +bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { + assert(!A->collapsed() && "Cannot merge into collapsed"); + assert(!B->collapsed() && "Cannot merge from collapsed"); + if (!A->compat(B->Mask)) + return false; + A->Mask &= B->Mask; + A->Dist = std::max(A->Dist, B->Dist); + A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx) + if (LiveRegs[rx] == B) + SetLiveReg(rx, A); + return true; +} + void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) { MBB = mbb; - DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n"); +} + +// A hard instruction only works in one domain. All input registers will be +// forced into that domain. +void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) { + // Collapse all uses. + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Force(rx, domain); + } + + // Kill all defs and force them. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Kill(rx); + Force(rx, domain); + } +} + +// A soft instruction can be changed to work in other domains given by mask. +void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { + // Scan the explicit use operands for incoming domains. + unsigned collmask = mask; + SmallVector used; + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + if (DomainValue *dv = LiveRegs[rx]) { + // Is it possible to use this collapsed register for free? + if (dv->collapsed()) { + if (unsigned m = collmask & dv->Mask) + collmask = m; + } else if (dv->compat(collmask)) + used.push_back(rx); + else + Kill(rx); + } + } + + // If the collapsed operands force a single domain, propagate the collapse. + if (isPowerOf2_32(collmask)) { + unsigned domain = CountTrailingZeros_32(collmask); + TII->SetSSEDomain(mi, domain); + visitHardInstr(mi, domain); + return; + } + + // Kill off any remaining uses that don't match collmask, and build a list of + // incoming DomainValue that we want to merge. + SmallVector doms; + for (SmallVector::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + int rx = *i; + DomainValue *dv = LiveRegs[rx]; + // This useless DomainValue could have been missed above + if (!dv->compat(collmask)) { + Kill(*i); + continue; + } + // sorted, uniqued insert. + bool inserted = false; + for (SmallVector::iterator i = doms.begin(), e = doms.end(); + i != e && !inserted; ++i) { + if (dv == *i) + inserted = true; + else if (dv->Dist < (*i)->Dist) { + inserted = true; + doms.insert(i, dv); + } + } + if (!inserted) + doms.push_back(dv); + } + + // doms are now sorted in order of appearance. Try to merge them all, giving + // priority to the latest ones. + DomainValue *dv = 0; + while (!doms.empty()) { + if (!dv) + dv = doms.back(); + else if (!Merge(dv, doms.back())) + for (SmallVector::iterator i=used.begin(), e=used.end(); i!=e; ++i) + if (LiveRegs[*i] == doms.back()) + Kill(*i); + doms.pop_back(); + } + + // dv is the DomainValue we are going to use for this instruction. + if (!dv) + dv = Pool.Alloc(); + dv->Mask = collmask; + dv->Instrs.push_back(mi); + + // Finally set all defs and non-collapsed uses to dv. + for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { + Kill(rx); + SetLiveReg(rx, dv); + } + } +} + +void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { + // Process explicit defs, kill any XMM registers redefined + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Kill(rx); + } } bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = static_cast(MF->getTarget().getInstrInfo()); + TRI = MF->getTarget().getRegisterInfo(); + MBB = 0; + + hasLiveRegs = false; + for (unsigned i=0, e = array_lengthof(LiveRegs); i != e; ++i) + LiveRegs[i] = 0; // If no XMM registers are used in the function, we can skip it completely. - bool XMMIsUsed = false; + bool anyregs = false; for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), E = X86::VR128RegClass.end(); I != E; ++I) if (MF->getRegInfo().isPhysRegUsed(*I)) { - XMMIsUsed = true; + anyregs = true; break; } - if (!XMMIsUsed) return false; + if (!anyregs) return false; MachineBasicBlock *Entry = MF->begin(); SmallPtrSet Visited; - for (df_ext_iterator > + for (df_ext_iterator > DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); - DFI != DFE; ++DFI) { + DFI != DFE; ++DFI) { enterBasicBlock(*DFI); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - MachineInstr *MI = I; - const unsigned *equiv = 0; - X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv); - (void) domain; - DEBUG(dbgs() << "-isd"[domain] << (equiv ? "* " : " ") << *MI); + MachineInstr *mi = I; + if (mi->isDebugValue()) continue; + std::pair domp = TII->GetSSEDomain(mi); + if (domp.first) + if (domp.second) + visitSoftInstr(mi, domp.second); + else + visitHardInstr(mi, domp.first); + else if (hasLiveRegs) + visitGenericInstr(mi); } } + + Pool.Clear(); + return false; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index eeb020ba3b73..5def78737f88 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3659,45 +3659,49 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { return GlobalBaseReg; } -X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI, - const unsigned *&equiv) const { - // These are the replaceable SSE instructions. Some of these have Int variants - // that we don't include here. We don't want to replace instructions selected - // by intrinsics. - static const unsigned ReplaceableInstrs[][3] = { - //PackedInt PackedSingle PackedDouble - { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr }, - { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm }, - { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr }, - { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr }, - { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm }, - { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr }, - { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm }, - { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr }, - { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm }, - { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr }, - { X86::PORrm, X86::ORPSrm, X86::ORPDrm }, - { X86::PORrr, X86::ORPSrr, X86::ORPDrr }, - { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm }, - { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr }, - { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm }, - { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr }, - { X86::PXORrm, X86::XORPSrm, X86::XORPDrm }, - { X86::PXORrr, X86::XORPSrr, X86::XORPDrr }, - }; +// These are the replaceable SSE instructions. Some of these have Int variants +// that we don't include here. We don't want to replace instructions selected +// by intrinsics. +static const unsigned ReplaceableInstrs[][3] = { + //PackedInt PackedSingle PackedDouble + { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr }, + { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm }, + { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr }, + { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr }, + { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm }, + { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr }, + { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm }, + { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr }, + { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm }, + { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr }, + { X86::PORrm, X86::ORPSrm, X86::ORPDrm }, + { X86::PORrr, X86::ORPSrr, X86::ORPDrr }, + { X86::PXORrm, X86::XORPSrm, X86::XORPDrm }, + { X86::PXORrr, X86::XORPSrr, X86::XORPDrr }, +}; - const SSEDomain domain = - SSEDomain((MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3); - if (domain == NotSSEDomain) - return domain; +// FIXME: Some shuffle and unpack instructions have equivalents in different +// domains, but they require a bit more work than just switching opcodes. - // Linear search FTW! - const unsigned opc = MI->getOpcode(); +static const unsigned *lookup(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) - if (ReplaceableInstrs[i][domain-1] == opc) { - equiv = ReplaceableInstrs[i]; - return domain; - } - equiv = 0; - return domain; + if (ReplaceableInstrs[i][domain-1] == opcode) + return ReplaceableInstrs[i]; + return 0; +} + +std::pair +X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const { + uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; + return std::make_pair(domain, domain != NotSSEDomain && + lookup(MI->getOpcode(), domain) ? 0xe : 0); +} + +void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { + assert(Domain>0 && Domain<4 && "Invalid execution domain"); + uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; + assert(dom && "Not an SSE instruction"); + const unsigned *table = lookup(MI->getOpcode(), dom); + assert(table && "Cannot change domain"); + MI->setDesc(get(table[Domain-1])); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 965740dcaf06..2486d1be66b2 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -722,11 +722,12 @@ public: /// Some SSE instructions come in variants for three domains. enum SSEDomain { NotSSEDomain, PackedInt, PackedSingle, PackedDouble }; - /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for - /// unknown instructions. If the instruction has equivalents for other - /// domains, equiv points to a list of opcodes for [PackedInt, PackedSingle, - /// PackedDouble]. - SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const; + /// GetSSEDomain - Return the SSE execution domain of MI as the first element, + /// and a bitmask of possible arguments to SetSSEDomain ase the second. + std::pair GetSSEDomain(const MachineInstr *MI) const; + + /// SetSSEDomain - Set the SSEDomain of MI. + void SetSSEDomain(MachineInstr *MI, unsigned Domain) const; private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,