[llvm-mca] Make the LSUnit a HardwareUnit, and allow derived classes to implement a different memory consistency model.

The LSUnit is now a HardwareUnit, and it is owned by the mca::Context.
Derived classes can now implement a different consistency model by overriding
method `LSUnit::isReady()`.

This patch also slightly refactors the Scheduler interface in the attempt to
simplifying the interaction between ExecuteStage and the underlying Scheduler.

llvm-svn: 340176
This commit is contained in:
Andrea Di Biagio 2018-08-20 14:41:36 +00:00
parent 870aaf2963
commit 0875e759f0
6 changed files with 155 additions and 145 deletions

View File

@ -36,8 +36,9 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
// Create the hardware units defining the backend.
auto RCU = llvm::make_unique<RetireControlUnit>(SM);
auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
auto HWS = llvm::make_unique<Scheduler>(
SM, Opts.LoadQueueSize, Opts.StoreQueueSize, Opts.AssumeNoAlias);
auto LSU = llvm::make_unique<LSUnit>(Opts.LoadQueueSize, Opts.StoreQueueSize,
Opts.AssumeNoAlias);
auto HWS = llvm::make_unique<Scheduler>(SM, LSU.get());
// Create the pipeline and its stages.
auto StagePipeline = llvm::make_unique<Pipeline>();
@ -47,9 +48,10 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
// Add the hardware to the context.
// Pass the ownership of all the hardware units to this Context.
addHardwareUnit(std::move(RCU));
addHardwareUnit(std::move(PRF));
addHardwareUnit(std::move(LSU));
addHardwareUnit(std::move(HWS));
// Build the pipeline.

View File

@ -25,17 +25,17 @@ namespace mca {
using namespace llvm;
HWStallEvent::GenericEventType toHWStallEventType(Scheduler::StallKind Event) {
switch (Event) {
case Scheduler::LoadQueueFull:
HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) {
switch (Status) {
case Scheduler::SC_LOAD_QUEUE_FULL:
return HWStallEvent::LoadQueueFull;
case Scheduler::StoreQueueFull:
case Scheduler::SC_STORE_QUEUE_FULL:
return HWStallEvent::StoreQueueFull;
case Scheduler::SchedulerQueueFull:
case Scheduler::SC_BUFFERS_FULL:
return HWStallEvent::SchedulerQueueFull;
case Scheduler::DispatchGroupStall:
case Scheduler::SC_DISPATCH_GROUP_STALL:
return HWStallEvent::DispatchGroupStall;
case Scheduler::NoStall:
case Scheduler::SC_AVAILABLE:
return HWStallEvent::Invalid;
}
@ -43,15 +43,15 @@ HWStallEvent::GenericEventType toHWStallEventType(Scheduler::StallKind Event) {
}
bool ExecuteStage::isAvailable(const InstRef &IR) const {
Scheduler::StallKind Event = Scheduler::NoStall;
if (HWS.canBeDispatched(IR, Event))
return true;
HWStallEvent::GenericEventType ET = toHWStallEventType(Event);
notifyEvent<HWStallEvent>(HWStallEvent(ET, IR));
return false;
if (Scheduler::Status S = HWS.isAvailable(IR)) {
HWStallEvent::GenericEventType ET = toHWStallEventType(S);
notifyEvent<HWStallEvent>(HWStallEvent(ET, IR));
return false;
}
return true;
}
// Reclaim the simulated resources used by the scheduler.
void ExecuteStage::reclaimSchedulerResources() {
SmallVector<ResourceRef, 8> ResourcesFreed;
HWS.reclaimSimulatedResources(ResourcesFreed);
@ -59,7 +59,6 @@ void ExecuteStage::reclaimSchedulerResources() {
notifyResourceAvailable(RR);
}
// Update the scheduler's instruction queues.
Error ExecuteStage::updateSchedulerQueues() {
SmallVector<InstRef, 4> InstructionIDs;
HWS.updateIssuedSet(InstructionIDs);
@ -77,7 +76,6 @@ Error ExecuteStage::updateSchedulerQueues() {
return ErrorSuccess();
}
// Issue instructions that are waiting in the scheduler's ready queue.
Error ExecuteStage::issueReadyInstructions() {
SmallVector<InstRef, 4> InstructionIDs;
InstRef IR = HWS.select();
@ -145,39 +143,20 @@ Error ExecuteStage::execute(InstRef &IR) {
// be released after MCIS is issued, and all the ResourceCycles for those
// units have been consumed.
const InstrDesc &Desc = IR.getInstruction()->getDesc();
HWS.reserveBuffers(Desc.Buffers);
HWS.dispatch(IR);
notifyReservedBuffers(Desc.Buffers);
// Obtain a slot in the LSU. If we cannot reserve resources, return true, so
// that succeeding stages can make progress.
if (!HWS.reserveResources(IR))
if (!HWS.isReady(IR))
return ErrorSuccess();
// If we did not return early, then the scheduler is ready for execution.
notifyInstructionReady(IR);
// Don't add a zero-latency instruction to the Wait or Ready queue.
// A zero-latency instruction doesn't consume any scheduler resources. That is
// because it doesn't need to be executed, and it is often removed at register
// renaming stage. For example, register-register moves are often optimized at
// register renaming stage by simply updating register aliases. On some
// targets, zero-idiom instructions (for example: a xor that clears the value
// of a register) are treated specially, and are often eliminated at register
// renaming stage.
//
// Instructions that use an in-order dispatch/issue processor resource must be
// issued immediately to the pipeline(s). Any other in-order buffered
// resources (i.e. BufferSize=1) is consumed.
//
// If we cannot issue immediately, the HWS will add IR to its ready queue for
// execution later, so we must return early here.
if (!HWS.issueImmediately(IR))
if (!HWS.mustIssueImmediately(IR))
return ErrorSuccess();
LLVM_DEBUG(dbgs() << "[SCHEDULER] Instruction #" << IR
<< " issued immediately\n");
// Issue IR. The resources for this issuance will be placed in 'Used.'
// Issue IR to the underlying pipelines.
SmallVector<std::pair<ResourceRef, double>, 4> Used;
HWS.issueInstruction(IR, Used);
@ -193,7 +172,6 @@ Error ExecuteStage::execute(InstRef &IR) {
}
void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) {
HWS.onInstructionExecuted(IR);
LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
notifyEvent<HWInstructionEvent>(
HWInstructionEvent(HWInstructionEvent::Executed, IR));

View File

@ -51,33 +51,42 @@ void LSUnit::assignSQSlot(unsigned Index) {
StoreQueue.insert(Index);
}
bool LSUnit::reserve(const InstRef &IR) {
void LSUnit::dispatch(const InstRef &IR) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
unsigned MayLoad = Desc.MayLoad;
unsigned MayStore = Desc.MayStore;
unsigned IsMemBarrier = Desc.HasSideEffects;
if (!MayLoad && !MayStore)
return false;
assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
const unsigned Index = IR.getSourceIndex();
if (MayLoad) {
if (Desc.MayLoad) {
if (IsMemBarrier)
LoadBarriers.insert(Index);
assignLQSlot(Index);
}
if (MayStore) {
if (Desc.MayStore) {
if (IsMemBarrier)
StoreBarriers.insert(Index);
assignSQSlot(Index);
}
return true;
}
LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
if (Desc.MayLoad && isLQFull())
return LSUnit::LSU_LQUEUE_FULL;
if (Desc.MayStore && isSQFull())
return LSUnit::LSU_SQUEUE_FULL;
return LSUnit::LSU_AVAILABLE;
}
bool LSUnit::isReady(const InstRef &IR) const {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
const unsigned Index = IR.getSourceIndex();
bool IsALoad = LoadQueue.count(Index) != 0;
bool IsAStore = StoreQueue.count(Index) != 0;
assert((IsALoad || IsAStore) && "Instruction is not in queue!");
bool IsALoad = Desc.MayLoad;
bool IsAStore = Desc.MayStore;
assert((IsALoad || IsAStore) && "Not a memory operation!");
assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
if (IsALoad && !LoadBarriers.empty()) {
unsigned LoadBarrierIndex = *LoadBarriers.begin();

View File

@ -16,6 +16,7 @@
#ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H
#define LLVM_TOOLS_LLVM_MCA_LSUNIT_H
#include "HardwareUnit.h"
#include <set>
namespace mca {
@ -86,7 +87,7 @@ struct InstrDesc;
/// A load/store barrier is "executed" when it becomes the oldest entry in
/// the load/store queue(s). That also means, all the older loads/stores have
/// already been executed.
class LSUnit {
class LSUnit : public HardwareUnit {
// Load queue size.
// LQ_Size == 0 means that there are infinite slots in the load queue.
unsigned LQ_Size;
@ -115,6 +116,11 @@ class LSUnit {
// before newer loads are issued.
std::set<unsigned> LoadBarriers;
bool isSQEmpty() const { return StoreQueue.empty(); }
bool isLQEmpty() const { return LoadQueue.empty(); }
bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
public:
LSUnit(unsigned LQ = 0, unsigned SQ = 0, bool AssumeNoAlias = false)
: LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {}
@ -123,22 +129,30 @@ public:
void dump() const;
#endif
bool isSQEmpty() const { return StoreQueue.empty(); }
bool isLQEmpty() const { return LoadQueue.empty(); }
bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
enum Status {
LSU_AVAILABLE = 0,
LSU_LQUEUE_FULL,
LSU_SQUEUE_FULL
};
// Returns true if this instruction has been successfully enqueued.
bool reserve(const InstRef &IR);
// Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
// IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
Status isAvailable(const InstRef &IR) const;
// The rules are:
// Allocates load/store queue resources for IR.
//
// This method assumes that a previous call to `isAvailable(IR)` returned
// LSU_AVAILABLE, and that IR is a memory operation.
void dispatch(const InstRef &IR);
// By default, rules are:
// 1. A store may not pass a previous store.
// 2. A load may not pass a previous store unless flag 'NoAlias' is set.
// 3. A load may pass a previous load.
// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
// 5. A load has to wait until an older load barrier is fully executed.
// 6. A store has to wait until an older store barrier is fully executed.
bool isReady(const InstRef &IR) const;
virtual bool isReady(const InstRef &IR) const;
void onInstructionExecuted(const InstRef &IR);
};

View File

@ -171,7 +171,7 @@ bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
// Returns true if all resources are in-order, and there is at least one
// resource which is a dispatch hazard (BufferSize = 0).
bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) {
bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const {
if (!canBeIssued(Desc))
return false;
bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) {
@ -257,29 +257,29 @@ void Scheduler::dump() const {
}
#endif
bool Scheduler::canBeDispatched(const InstRef &IR,
Scheduler::StallKind &Event) const {
Event = StallKind::NoStall;
Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
// Give lower priority to these stall events.
if (Desc.MayStore && LSU->isSQFull())
Event = StallKind::StoreQueueFull;
if (Desc.MayLoad && LSU->isLQFull())
Event = StallKind::LoadQueueFull;
switch (Resources->canBeDispatched(Desc.Buffers)) {
case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
Event = StallKind::SchedulerQueueFull;
break;
return Scheduler::SC_BUFFERS_FULL;
case ResourceStateEvent::RS_RESERVED:
Event = StallKind::DispatchGroupStall;
break;
default:
return Scheduler::SC_DISPATCH_GROUP_STALL;
case ResourceStateEvent::RS_BUFFER_AVAILABLE:
break;
}
return Event == StallKind::NoStall;
// Give lower priority to LSUnit stall events.
switch (LSU->isAvailable(IR)) {
case LSUnit::LSU_LQUEUE_FULL:
return Scheduler::SC_LOAD_QUEUE_FULL;
case LSUnit::LSU_SQUEUE_FULL:
return Scheduler::SC_STORE_QUEUE_FULL;
case LSUnit::LSU_AVAILABLE:
return Scheduler::SC_AVAILABLE;
}
llvm_unreachable("Don't know how to process this LSU state result!");
}
void Scheduler::issueInstructionImpl(
@ -298,6 +298,8 @@ void Scheduler::issueInstructionImpl(
if (IS->isExecuting())
IssuedSet.emplace_back(IR);
else if (IS->isExecuted())
LSU->onInstructionExecuted(IR);
}
// Release the buffered resources and issue the instruction.
@ -305,7 +307,7 @@ void Scheduler::issueInstruction(
InstRef &IR,
SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
releaseBuffers(Desc.Buffers);
Resources->releaseBuffers(Desc.Buffers);
issueInstructionImpl(IR, UsedResources);
}
@ -324,9 +326,8 @@ void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
if (!IS.isReady())
IS.update();
const InstrDesc &Desc = IS.getDesc();
bool IsMemOp = Desc.MayLoad || Desc.MayStore;
if (!IS.isReady() || (IsMemOp && !LSU->isReady(IR))) {
// Check f there are still unsolved data dependencies.
if (!isReady(IR)) {
++I;
continue;
}
@ -405,6 +406,8 @@ void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
continue;
}
// Instruction IR has completed execution.
LSU->onInstructionExecuted(IR);
Executed.emplace_back(IR);
++RemovedElements;
IR.invalidate();
@ -414,33 +417,51 @@ void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
IssuedSet.resize(IssuedSet.size() - RemovedElements);
}
void Scheduler::onInstructionExecuted(const InstRef &IR) {
LSU->onInstructionExecuted(IR);
}
void Scheduler::reclaimSimulatedResources(SmallVectorImpl<ResourceRef> &Freed) {
Resources->cycleEvent(Freed);
}
bool Scheduler::reserveResources(InstRef &IR) {
// If necessary, reserve queue entries in the load-store unit (LSU).
const bool Reserved = LSU->reserve(IR);
if (!IR.getInstruction()->isReady() || (Reserved && !LSU->isReady(IR))) {
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
WaitSet.push_back(IR);
return false;
}
return true;
bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
// Instructions that use an in-order dispatch/issue processor resource must be
// issued immediately to the pipeline(s). Any other in-order buffered
// resources (i.e. BufferSize=1) is consumed.
const InstrDesc &Desc = IR.getInstruction()->getDesc();
return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc);
}
bool Scheduler::issueImmediately(InstRef &IR) {
void Scheduler::dispatch(const InstRef &IR) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
Resources->reserveBuffers(Desc.Buffers);
// If necessary, reserve queue entries in the load-store unit (LSU).
bool IsMemOp = Desc.MayLoad || Desc.MayStore;
if (IsMemOp)
LSU->dispatch(IR);
if (!isReady(IR)) {
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
WaitSet.push_back(IR);
return;
}
// Don't add a zero-latency instruction to the Ready queue.
// A zero-latency instruction doesn't consume any scheduler resources. That is
// because it doesn't need to be executed, and it is often removed at register
// renaming stage. For example, register-register moves are often optimized at
// register renaming stage by simply updating register aliases. On some
// targets, zero-idiom instructions (for example: a xor that clears the value
// of a register) are treated specially, and are often eliminated at register
// renaming stage.
if (!mustIssueImmediately(IR)) {
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
ReadySet.push_back(IR);
return false;
}
return true;
}
bool Scheduler::isReady(const InstRef &IR) const {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
bool IsMemOp = Desc.MayLoad || Desc.MayStore;
return IR.getInstruction()->isReady() && (!IsMemOp || LSU->isReady(IR));
}
} // namespace mca

View File

@ -321,7 +321,7 @@ public:
// Returns true if all resources are in-order, and there is at least one
// resource which is a dispatch hazard (BufferSize = 0).
bool mustIssueImmediately(const InstrDesc &Desc);
bool mustIssueImmediately(const InstrDesc &Desc) const;
bool canBeIssued(const InstrDesc &Desc) const;
@ -364,10 +364,10 @@ public:
/// leaves the IssuedSet when it reaches the write-back stage.
class Scheduler : public HardwareUnit {
const llvm::MCSchedModel &SM;
LSUnit *LSU;
// Hardware resources that are managed by this scheduler.
std::unique_ptr<ResourceManager> Resources;
std::unique_ptr<LSUnit> LSU;
std::vector<InstRef> WaitSet;
std::vector<InstRef> ReadySet;
@ -379,54 +379,49 @@ class Scheduler : public HardwareUnit {
llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
public:
Scheduler(const llvm::MCSchedModel &Model, unsigned LoadQueueSize,
unsigned StoreQueueSize, bool AssumeNoAlias)
: SM(Model), Resources(llvm::make_unique<ResourceManager>(SM)),
LSU(llvm::make_unique<LSUnit>(LoadQueueSize, StoreQueueSize,
AssumeNoAlias)) {}
Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu)
: SM(Model), LSU(Lsu), Resources(llvm::make_unique<ResourceManager>(SM)) {
}
// Stalls generated by the scheduler.
enum StallKind {
NoStall,
LoadQueueFull,
StoreQueueFull,
SchedulerQueueFull,
DispatchGroupStall
enum Status {
SC_AVAILABLE,
SC_LOAD_QUEUE_FULL,
SC_STORE_QUEUE_FULL,
SC_BUFFERS_FULL,
SC_DISPATCH_GROUP_STALL,
};
/// Check if the instruction in 'IR' can be dispatched.
/// Check if the instruction in 'IR' can be dispatched and returns an answer
/// in the form of a Status value.
///
/// The DispatchStage is responsible for querying the Scheduler before
/// dispatching new instructions. This routine is used for performing such
/// a query. If the instruction 'IR' can be dispatched, then true is
/// returned, otherwise false is returned with Event set to the stall type.
bool canBeDispatched(const InstRef &IR, StallKind &Event) const;
/// Internally, it also checks if the load/store unit is available.
Status isAvailable(const InstRef &IR) const;
/// Returns true if there is availibility for IR in the LSU.
bool isReady(const InstRef &IR) const { return LSU->isReady(IR); }
/// Reserves buffer and LSUnit queue resources that are necessary to issue
/// this instruction.
///
/// Returns true if instruction IR is ready to be issued to the underlying
/// pipelines. Note that this operation cannot fail; it assumes that a
/// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
void dispatch(const InstRef &IR);
/// Returns true if IR is ready to be executed by the underlying pipelines.
/// This method assumes that IR has been previously dispatched.
bool isReady(const InstRef &IR) const;
/// Issue an instruction. The Used container is populated with
/// the resource objects consumed on behalf of issuing this instruction.
void
issueInstruction(InstRef &IR,
void issueInstruction(InstRef &IR,
llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Used);
/// This routine will attempt to issue an instruction immediately (for
/// zero-latency instructions).
///
/// Returns true if the instruction is issued immediately. If this does not
/// occur, then the instruction will be added to the Scheduler's ReadySet.
bool issueImmediately(InstRef &IR);
/// Reserve one entry in each buffered resource.
void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers) {
Resources->reserveBuffers(Buffers);
}
/// Release buffer entries previously allocated by method reserveBuffers.
void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers) {
Resources->releaseBuffers(Buffers);
}
/// Returns true if IR has to be issued immediately, or if IR is a zero
/// latency instruction.
bool mustIssueImmediately(const InstRef &IR) const;
/// Update the resources managed by the scheduler.
/// This routine is to be called at the start of a new cycle, and is
@ -444,21 +439,12 @@ public:
/// Update the issued queue.
void updateIssuedSet(llvm::SmallVectorImpl<InstRef> &Executed);
/// Updates the Scheduler's resources to reflect that an instruction has just
/// been executed.
void onInstructionExecuted(const InstRef &IR);
/// Obtain the processor's resource identifier for the given
/// resource mask.
unsigned getResourceID(uint64_t Mask) {
return Resources->resolveResourceMask(Mask);
}
/// Reserve resources necessary to issue the instruction.
/// Returns true if the resources are ready and the (LSU) can
/// execute the given instruction immediately.
bool reserveResources(InstRef &IR);
/// Select the next instruction to issue from the ReadySet.
/// This method gives priority to older instructions.
InstRef select();