[HLSCpp] eliminate PragmaOps, update ArrayOp definition; [Analysis] refactor Utils; [StoreForward] start of this pass

This commit is contained in:
Hanchen Ye 2020-12-18 23:42:41 -06:00
parent 2d943dd238
commit 117a1bd0f4
16 changed files with 424 additions and 346 deletions

View File

@ -15,7 +15,7 @@ class Pass;
namespace mlir { namespace mlir {
namespace scalehls { namespace scalehls {
std::unique_ptr<mlir::Pass> createQoREstimationPass(); std::unique_ptr<Pass> createQoREstimationPass();
void registerAnalysisPasses(); void registerAnalysisPasses();

View File

@ -19,7 +19,20 @@ class HLSCppAnalysisBase {
public: public:
explicit HLSCppAnalysisBase(OpBuilder builder) : builder(builder) {} explicit HLSCppAnalysisBase(OpBuilder builder) : builder(builder) {}
OpBuilder builder; /// Get partition information methods.
StringRef getPartitionType(hlscpp::ArrayOp op, unsigned dim) {
if (auto attr = op.partition_type()[dim].cast<StringAttr>())
return attr.getValue();
else
return "";
}
unsigned getPartitionFactor(hlscpp::ArrayOp op, unsigned dim) {
if (auto attr = op.partition_factor()[dim].cast<IntegerAttr>())
return attr.getUInt();
else
return 0;
}
/// Get attribute value methods. /// Get attribute value methods.
int32_t getIntAttrValue(Operation *op, StringRef name) { int32_t getIntAttrValue(Operation *op, StringRef name) {
@ -50,21 +63,6 @@ public:
return ""; return "";
} }
/// Get partition information methods.
StringRef getPartitionType(hlscpp::ArrayOp op, unsigned dim) {
if (auto attr = op.partition_type()[dim].cast<StringAttr>())
return attr.getValue();
else
return "";
}
unsigned getPartitionFactor(hlscpp::ArrayOp op, unsigned dim) {
if (auto attr = op.partition_factor()[dim].cast<IntegerAttr>())
return attr.getUInt();
else
return 0;
}
/// Set attribute value methods. /// Set attribute value methods.
void setAttrValue(Operation *op, StringRef name, int32_t value) { void setAttrValue(Operation *op, StringRef name, int32_t value) {
op->setAttr(name, builder.getI32IntegerAttr(value)); op->setAttr(name, builder.getI32IntegerAttr(value));
@ -82,46 +80,31 @@ public:
op->setAttr(name, builder.getStringAttr(value)); op->setAttr(name, builder.getStringAttr(value));
} }
/// Set schedule attribute methods. OpBuilder builder;
void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
setAttrValue(op, "schedule_begin", begin);
setAttrValue(op, "schedule_end", end);
}
}; };
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Common Used Type Declarations // Helper methods
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Profiled latency map. // For storing all affine memory access operations (including AffineLoadOp and
using LatencyMap = llvm::StringMap<unsigned>; // AffineStoreOp) indexed by the array (ArrayOp).
// For storing all memory access operations (including AffineLoadOp and
// AffineStoreOp) indexed by the array instance (ArrayOp).
using LoadStores = SmallVector<Operation *, 16>; using LoadStores = SmallVector<Operation *, 16>;
using LoadStoresMap = DenseMap<Operation *, LoadStores>; using LoadStoresMap = DenseMap<Operation *, LoadStores>;
// For storing all dependent operations indexed by the source operation. // Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
using Depends = SmallVector<Operation *, 16>; // AffineIfOp is transparent.
using DependsMap = DenseMap<Operation *, Depends>; bool checkSameLevel(Operation *lhsOp, Operation *rhsOp);
// Indicate the unoccupied memory ports number. // Get the pointer of the scrOp's parent loop, which should locate at the same
struct PortInfo { // level with dstOp's any parent loop.
PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0) Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
: rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
unsigned rdPort; /// Get the definition ArrayOp given any memory access operation.
unsigned wrPort; hlscpp::ArrayOp getArrayOp(Operation *op);
unsigned rdwrPort;
};
// For storing ports number of all partitions indexed by the array instance /// Collect all load and store operations in the block.
// (ArrayOp). void getLoadStoresMap(Block &block, LoadStoresMap &map);
using Ports = SmallVector<PortInfo, 16>;
using PortsMap = DenseMap<Operation *, Ports>;
// For storing PortsMap indexed by the scheduling level.
using PortsMapDict = DenseMap<unsigned, PortsMap>;
} // namespace scalehls } // namespace scalehls
} // namespace mlir } // namespace mlir

View File

@ -15,8 +15,8 @@ class Pass;
namespace mlir { namespace mlir {
namespace scalehls { namespace scalehls {
std::unique_ptr<mlir::Pass> createConvertToHLSCppPass(); std::unique_ptr<Pass> createConvertToHLSCppPass();
std::unique_ptr<mlir::Pass> createHLSKernelToAffinePass(); std::unique_ptr<Pass> createHLSKernelToAffinePass();
void registerConversionPasses(); void registerConversionPasses();

View File

@ -26,7 +26,6 @@ class HLSCppOp<string mnemonic, list<OpTrait> traits = []> :
include "Interfaces.td" include "Interfaces.td"
include "Attributes.td" include "Attributes.td"
include "PragmaOps.td"
include "StructureOps.td" include "StructureOps.td"
#endif // SCALEHLS_DIALECT_HLSCPP_HLSCPP_TD #endif // SCALEHLS_DIALECT_HLSCPP_HLSCPP_TD

View File

@ -1,81 +0,0 @@
//===-------------------------------------------------------*- tablegen -*-===//
// Deprecated. Will be removed somehow in someday.
//===----------------------------------------------------------------------===//
#ifndef SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD
#define SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD
def ArrayPragmaOp : HLSCppOp<"array_pragma", [PragmaOpInterface]> {
let summary = "Apply array pragmas";
let description = [{
This hlscpp.func_pragma operation represent pragmas for arrays, such as
array partition, interface, and bind storage pragma.
}];
let arguments = (ins
// Targeted array.
Type<IsShapedTypePred> : $variable,
// Interface-related attributes.
DefaultValuedAttr<BoolAttr, "false"> : $interface,
DefaultValuedAttr<InterfaceModeAttr, "m_axi"> : $interface_mode,
DefaultValuedAttr<PositiveUI32Attr, "1024"> : $interface_depth,
// BindStorage-related attributes.
DefaultValuedAttr<BoolAttr, "false"> : $storage,
DefaultValuedAttr<StorageTypeAttr, "ram_2p"> : $storage_type,
DefaultValuedAttr<StorageImplAttr, "bram"> : $storage_impl,
// ArrayPartition-related attributes.
DefaultValuedAttr<BoolAttr, "false"> : $partition,
DefaultValuedAttr<PartitionTypeArrayAttr, "{}"> : $partition_type,
DefaultValuedAttr<PositiveUI32ArrayAttr, "{}"> : $partition_factor
);
let assemblyFormat = [{`(` $variable `)` attr-dict `:` type($variable)}];
let extraClassDeclaration = [{}];
}
def LoopPragmaOp : HLSCppOp<"loop_pragma", [
PragmaOpInterface,
HasParent<"AffineForOp">
]> {
let summary = "Apply loop pragmas";
let description = [{
This hlscpp.loop_pragma operation represent pragmas for loops, such as
pipeline, and unroll pragma.
}];
let arguments = (ins
// Pipeline-related attributes.
DefaultValuedAttr<BoolAttr, "false"> : $pipeline,
DefaultValuedAttr<PositiveUI32Attr, "1"> : $pipeline_II,
// Loop-related attributes.
DefaultValuedAttr<BoolAttr, "false"> : $flatten,
DefaultValuedAttr<BoolAttr, "false"> : $unroll
);
let assemblyFormat = [{attr-dict}];
let extraClassDeclaration = [{}];
}
def FuncPragmaOp : HLSCppOp<"func_pragma", [
PragmaOpInterface,
HasParent<"FuncOp">
]> {
let summary = "Apply function pragmas";
let description = [{
This hlscpp.func_pragma operation represent pragmas for functions, such as
pipeline, and dataflow pragma.
}];
let arguments = (ins
DefaultValuedAttr<BoolAttr, "false"> : $dataflow
);
let assemblyFormat = [{attr-dict}];
let extraClassDeclaration = [{}];
}
#endif // SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD

View File

@ -47,6 +47,12 @@ def ArrayOp : HLSCppOp<"array", [SameOperandsAndResultType]> {
); );
let results = (outs Type<IsShapedTypePred> : $output); let results = (outs Type<IsShapedTypePred> : $output);
let extraClassDeclaration = [{
ShapedType getShapedType() {
return getType().cast<ShapedType>();
}
}];
} }
def EndOp : HLSCppOp<"end", [Terminator]> { def EndOp : HLSCppOp<"end", [Terminator]> {

View File

@ -54,12 +54,9 @@ public:
SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp, SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp,
SignExtendIOp, IndexCastOp, CallOp, ReturnOp, SignExtendIOp, IndexCastOp, CallOp, ReturnOp,
// Structure operations. // Structure operations.
AssignOp, ArrayOp, EndOp, AssignOp, ArrayOp, EndOp>([&](auto opNode) -> ResultType {
// Pragma operations. return thisCast->visitOp(opNode, args...);
LoopPragmaOp, FuncPragmaOp, ArrayPragmaOp>( })
[&](auto opNode) -> ResultType {
return thisCast->visitOp(opNode, args...);
})
.Default([&](auto opNode) -> ResultType { .Default([&](auto opNode) -> ResultType {
return thisCast->visitInvalidOp(op, args...); return thisCast->visitInvalidOp(op, args...);
}); });
@ -191,11 +188,6 @@ public:
HANDLE(AssignOp); HANDLE(AssignOp);
HANDLE(ArrayOp); HANDLE(ArrayOp);
HANDLE(EndOp); HANDLE(EndOp);
// Pragma operations.
HANDLE(LoopPragmaOp);
HANDLE(FuncPragmaOp);
HANDLE(ArrayPragmaOp);
#undef HANDLE #undef HANDLE
}; };
} // namespace scalehls } // namespace scalehls

View File

@ -8,6 +8,8 @@
#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/IR/Builders.h" #include "mlir/IR/Builders.h"
#include "mlir/IR/Dialect.h" #include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/StandardTypes.h"
namespace mlir { namespace mlir {
namespace scalehls { namespace scalehls {

View File

@ -11,6 +11,14 @@ def HLSKernelOpInterface : OpInterface<"HLSKernelOpInterface"> {
let description = [{ let description = [{
This interface indicates the operation is an HLS kernel. This interface indicates the operation is an HLS kernel.
}]; }];
let methods = [
InterfaceMethod<
"Return the shaped type of the i-th operand",
"ShapedType", "getOperandShapedType", (ins "unsigned" : $i),
[{ return $_op.getOperation()->getOperand(i).getType().template cast<ShapedType>(); }]
>
];
} }
#endif // SCALEHLS_DIALECT_HLSKERNEL_INTERFACES_TD #endif // SCALEHLS_DIALECT_HLSKERNEL_INTERFACES_TD

View File

@ -16,21 +16,22 @@ namespace mlir {
namespace scalehls { namespace scalehls {
/// Pragma optimization passes. /// Pragma optimization passes.
std::unique_ptr<mlir::Pass> createPragmaDSEPass(); std::unique_ptr<Pass> createPragmaDSEPass();
std::unique_ptr<mlir::Pass> createLoopPipeliningPass(); std::unique_ptr<Pass> createLoopPipeliningPass();
std::unique_ptr<mlir::Pass> createArrayPartitionPass(); std::unique_ptr<Pass> createArrayPartitionPass();
/// Loop optimization passes. /// Loop optimization passes.
std::unique_ptr<mlir::Pass> createAffineLoopPerfectionPass(); std::unique_ptr<Pass> createAffineLoopPerfectionPass();
std::unique_ptr<mlir::Pass> createPartialAffineLoopTilePass(); std::unique_ptr<Pass> createPartialAffineLoopTilePass();
std::unique_ptr<mlir::Pass> createRemoveVarLoopBoundPass(); std::unique_ptr<Pass> createRemoveVarLoopBoundPass();
/// Dataflow optimization passes. /// Dataflow optimization passes.
std::unique_ptr<mlir::Pass> createSplitFunctionPass(); std::unique_ptr<Pass> createSplitFunctionPass();
std::unique_ptr<mlir::Pass> createLegalizeDataflowPass(); std::unique_ptr<Pass> createLegalizeDataflowPass();
/// Bufferization passes. /// Bufferization passes.
std::unique_ptr<mlir::Pass> createHLSKernelBufferizePass(); std::unique_ptr<Pass> createHLSKernelBufferizePass();
std::unique_ptr<Pass> createStoreForwardPass();
void registerTransformsPasses(); void registerTransformsPasses();

View File

@ -136,4 +136,14 @@ def HLSKernelBufferize : Pass<"hlskernel-bufferize", "FuncOp"> {
let constructor = "mlir::scalehls::createHLSKernelBufferizePass()"; let constructor = "mlir::scalehls::createHLSKernelBufferizePass()";
} }
def StoreForward : Pass<"store-forward", "FuncOp"> {
let summary = "Forward store to load, including conditional stores";
let description = [{
This store-forward pass is similar to memref-dataflow-opt, but support to
forward stores in if statements.
}];
let constructor = "mlir::scalehls::createStoreForwardPass()";
}
#endif // SCALEHLS_TRANSFORMS_PASSES_TD #endif // SCALEHLS_TRANSFORMS_PASSES_TD

View File

@ -18,8 +18,10 @@ using namespace mlir;
using namespace scalehls; using namespace scalehls;
using namespace hlscpp; using namespace hlscpp;
using LatencyMap = llvm::StringMap<unsigned>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// HLSCppEstimator Class Delaration // HLSCppEstimator Class
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
namespace { namespace {
@ -33,7 +35,34 @@ public:
getFuncMemRefDepends(); getFuncMemRefDepends();
} }
// Indicate the unoccupied memory ports number.
struct PortInfo {
PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
: rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
unsigned rdPort;
unsigned wrPort;
unsigned rdwrPort;
};
// For storing ports number of all partitions indexed by the array (ArrayOp).
using Ports = SmallVector<PortInfo, 16>;
using PortsMap = DenseMap<Operation *, Ports>;
// For storing PortsMap indexed by the scheduling level.
using PortsMapDict = DenseMap<unsigned, PortsMap>;
// For storing all dependent operations indexed by the source operation.
using Depends = SmallVector<Operation *, 16>;
using DependsMap = DenseMap<Operation *, Depends>;
void getFuncMemRefDepends(); void getFuncMemRefDepends();
void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
setAttrValue(op, "schedule_begin", begin);
setAttrValue(op, "schedule_end", end);
}
using HLSCppVisitorBase::visitOp; using HLSCppVisitorBase::visitOp;
Optional<unsigned> visitUnhandledOp(Operation *op, unsigned begin) { Optional<unsigned> visitUnhandledOp(Operation *op, unsigned begin) {
// Default latency of any unhandled operation is 1. // Default latency of any unhandled operation is 1.
@ -83,114 +112,6 @@ public:
}; };
} // namespace } // namespace
//===----------------------------------------------------------------------===//
// Helper methods
//===----------------------------------------------------------------------===//
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
// AffineIfOp is transparent.
static bool checkSameLevel(Operation *lhsOp, Operation *rhsOp) {
// If lhsOp and rhsOp are already at the same level, return true.
if (lhsOp->getBlock() == rhsOp->getBlock())
return true;
// Helper to get all surrounding AffineIfOps.
auto getSurroundIfs =
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
nests.push_back(op);
auto currentOp = op;
while (true) {
if (auto parentOp = currentOp->getParentOfType<AffineIfOp>()) {
nests.push_back(parentOp);
currentOp = parentOp;
} else
break;
}
});
SmallVector<Operation *, 4> lhsNests;
SmallVector<Operation *, 4> rhsNests;
getSurroundIfs(lhsOp, lhsNests);
getSurroundIfs(rhsOp, rhsNests);
// If any parent of lhsOp and any parent of rhsOp are at the same level,
// return true.
for (auto lhs : lhsNests)
for (auto rhs : rhsNests)
if (lhs->getBlock() == rhs->getBlock())
return true;
return false;
}
// Get the pointer of the scrOp's parent loop, which should locate at the same
// level with dstOp's any parent loop.
static Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
// If srcOp and dstOp are already at the same level, return the srcOp.
if (checkSameLevel(srcOp, dstOp))
return dstOp;
// Helper to get all surrouding AffineForOps. AffineIfOps are skipped.
auto getSurroundFors =
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
nests.push_back(op);
auto currentOp = op;
while (true) {
if (auto parentOp = currentOp->getParentOfType<AffineForOp>()) {
nests.push_back(parentOp);
currentOp = parentOp;
} else if (auto parentOp = currentOp->getParentOfType<AffineIfOp>())
currentOp = parentOp;
else
break;
}
});
SmallVector<Operation *, 4> srcNests;
SmallVector<Operation *, 4> dstNests;
getSurroundFors(srcOp, srcNests);
getSurroundFors(dstOp, dstNests);
// If any parent of srcOp (or itself) and any parent of dstOp (or itself) are
// at the same level, return the pointer.
for (auto src : srcNests)
for (auto dst : dstNests)
if (checkSameLevel(src, dst))
return dst;
return nullptr;
}
/// Get the definition ArrayOp given any memory access operation.
static ArrayOp getArrayOp(Operation *op) {
auto defOp = MemRefAccess(op).memref.getDefiningOp();
assert(defOp && "MemRef is block argument");
auto arrayOp = dyn_cast<ArrayOp>(defOp);
assert(arrayOp && "MemRef is not defined by ArrayOp");
return arrayOp;
}
/// Collect all load and store operations in the block.
static void getLoadStoresMap(Block &block, LoadStoresMap &map) {
for (auto &op : block) {
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
map[getArrayOp(&op)].push_back(&op);
else if (op.getNumRegions()) {
for (auto &region : op.getRegions())
for (auto &block : region)
getLoadStoresMap(block, map);
}
}
}
//===----------------------------------------------------------------------===//
// HLSCppEstimator Class Definition
//===----------------------------------------------------------------------===//
/// Collect all dependencies detected in the function. /// Collect all dependencies detected in the function.
void HLSCppEstimator::getFuncMemRefDepends() { void HLSCppEstimator::getFuncMemRefDepends() {
// TODO: This can be simplified by traversing each ArrayOp in the function. // TODO: This can be simplified by traversing each ArrayOp in the function.
@ -258,7 +179,7 @@ int32_t HLSCppEstimator::getPartitionIndex(Operation *op) {
if (type == "cyclic") if (type == "cyclic")
idxExpr = expr % builder.getAffineConstantExpr(factor); idxExpr = expr % builder.getAffineConstantExpr(factor);
else if (type == "block") { else if (type == "block") {
auto size = arrayOp.getType().cast<ShapedType>().getShape()[dim]; auto size = arrayOp.getShapedType().getShape()[dim];
idxExpr = expr.floorDiv( idxExpr = expr.floorDiv(
builder.getAffineConstantExpr((size + factor - 1) / factor)); builder.getAffineConstantExpr((size + factor - 1) / factor));
} }

109
lib/Analysis/Utils.cpp Normal file
View File

@ -0,0 +1,109 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "mlir/Analysis/AffineAnalysis.h"
using namespace mlir;
using namespace scalehls;
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
// AffineIfOp is transparent.
bool scalehls::checkSameLevel(Operation *lhsOp, Operation *rhsOp) {
// If lhsOp and rhsOp are already at the same level, return true.
if (lhsOp->getBlock() == rhsOp->getBlock())
return true;
// Helper to get all surrounding AffineIfOps.
auto getSurroundIfs =
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
nests.push_back(op);
auto currentOp = op;
while (true) {
if (auto parentOp = currentOp->getParentOfType<AffineIfOp>()) {
nests.push_back(parentOp);
currentOp = parentOp;
} else
break;
}
});
SmallVector<Operation *, 4> lhsNests;
SmallVector<Operation *, 4> rhsNests;
getSurroundIfs(lhsOp, lhsNests);
getSurroundIfs(rhsOp, rhsNests);
// If any parent of lhsOp and any parent of rhsOp are at the same level,
// return true.
for (auto lhs : lhsNests)
for (auto rhs : rhsNests)
if (lhs->getBlock() == rhs->getBlock())
return true;
return false;
}
// Get the pointer of the scrOp's parent loop, which should locate at the same
// level with dstOp's any parent loop.
Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
// If srcOp and dstOp are already at the same level, return the srcOp.
if (checkSameLevel(srcOp, dstOp))
return dstOp;
// Helper to get all surrouding AffineForOps. AffineIfOps are skipped.
auto getSurroundFors =
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
nests.push_back(op);
auto currentOp = op;
while (true) {
if (auto parentOp = currentOp->getParentOfType<AffineForOp>()) {
nests.push_back(parentOp);
currentOp = parentOp;
} else if (auto parentOp = currentOp->getParentOfType<AffineIfOp>())
currentOp = parentOp;
else
break;
}
});
SmallVector<Operation *, 4> srcNests;
SmallVector<Operation *, 4> dstNests;
getSurroundFors(srcOp, srcNests);
getSurroundFors(dstOp, dstNests);
// If any parent of srcOp (or itself) and any parent of dstOp (or itself) are
// at the same level, return the pointer.
for (auto src : srcNests)
for (auto dst : dstNests)
if (checkSameLevel(src, dst))
return dst;
return nullptr;
}
/// Get the definition ArrayOp given any memory access operation.
hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
auto defOp = MemRefAccess(op).memref.getDefiningOp();
assert(defOp && "MemRef is block argument");
auto arrayOp = dyn_cast<hlscpp::ArrayOp>(defOp);
assert(arrayOp && "MemRef is not defined by ArrayOp");
return arrayOp;
}
/// Collect all load and store operations in the block.
void scalehls::getLoadStoresMap(Block &block, LoadStoresMap &map) {
for (auto &op : block) {
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
map[getArrayOp(&op)].push_back(&op);
else if (op.getNumRegions()) {
for (auto &region : op.getRegions())
for (auto &block : region)
getLoadStoresMap(block, map);
}
}
}

View File

@ -201,11 +201,6 @@ public:
void emitAssign(AssignOp *op); void emitAssign(AssignOp *op);
void emitArray(ArrayOp *op); void emitArray(ArrayOp *op);
/// Pragma operation emitters.
void emitLoopPragma(LoopPragmaOp *op);
void emitFuncPragma(FuncPragmaOp *op);
void emitArrayPragma(ArrayPragmaOp *op);
/// Top-level MLIR module emitter. /// Top-level MLIR module emitter.
void emitModule(ModuleOp module); void emitModule(ModuleOp module);
@ -501,11 +496,6 @@ public:
bool visitOp(ArrayOp op) { return emitter.emitArray(&op), true; } bool visitOp(ArrayOp op) { return emitter.emitArray(&op), true; }
bool visitOp(EndOp op) { return true; } bool visitOp(EndOp op) { return true; }
/// Pragma operations.
bool visitOp(LoopPragmaOp op) { return emitter.emitLoopPragma(&op), true; }
bool visitOp(FuncPragmaOp op) { return emitter.emitFuncPragma(&op), true; }
bool visitOp(ArrayPragmaOp op) { return emitter.emitArrayPragma(&op), true; }
private: private:
ModuleEmitter &emitter; ModuleEmitter &emitter;
}; };
@ -1287,80 +1277,6 @@ void ModuleEmitter::emitArray(ArrayOp *op) {
os << "\n"; os << "\n";
} }
/// Pragma operation emitters. (deprecated)
void ModuleEmitter::emitLoopPragma(LoopPragmaOp *op) {
indent();
os << "#pragma HLS pipeline";
if (op->pipeline())
os << " II=" << op->pipeline_II();
else
os << " off\n";
if (op->unroll()) {
indent();
os << "#pragma HLS unroll\n";
}
// An empty line.
os << "\n";
}
void ModuleEmitter::emitFuncPragma(FuncPragmaOp *op) {
if (op->dataflow()) {
indent();
os << "#pragma HLS dataflow\n";
// An empty line.
os << "\n";
}
}
void ModuleEmitter::emitArrayPragma(ArrayPragmaOp *op) {
if (op->interface()) {
// Emit interface pragma.
indent();
os << "#pragma HLS interface";
os << " " << op->interface_mode();
os << " port=";
emitValue(op->getOperand());
if (op->interface_mode() == "m_axi") {
os << " depth=" << op->interface_depth();
os << " offset=slave\n";
} else
os << " storage_type=" << op->storage_type() << "\n";
} else {
// Emit bind_storage pragma.
indent();
os << "#pragma HLS bind_storage";
os << " variable=";
emitValue(op->getOperand());
os << " type=" << op->storage_type();
os << " impl=" << op->storage_impl() << "\n";
}
auto type = op->getOperand().getType().cast<ShapedType>();
if (op->partition() && type.hasStaticShape()) {
// Emit array_partition pragma(s).
for (unsigned dim = 0; dim < type.getRank(); ++dim) {
indent();
os << "#pragma HLS array_partition";
os << " variable=";
emitValue(op->getOperand());
auto partitionType =
op->partition_type()[dim].cast<StringAttr>().getValue();
os << " " << partitionType;
if (partitionType != "complete")
os << " factor="
<< op->partition_factor()[dim].cast<IntegerAttr>().getUInt();
os << " dim=" << dim + 1 << "\n";
}
}
os << "\n";
}
/// C++ component emitters. /// C++ component emitters.
void ModuleEmitter::emitValue(Value val, unsigned rank, bool isPtr) { void ModuleEmitter::emitValue(Value val, unsigned rank, bool isPtr) {
assert(!(rank && isPtr) && "should be either an array or a pointer."); assert(!(rank && isPtr) && "should be either an array or a pointer.");

View File

@ -35,7 +35,7 @@ template <typename OpType>
static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) { static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) {
for (auto pair : map) { for (auto pair : map) {
auto arrayOp = cast<ArrayOp>(pair.first); auto arrayOp = cast<ArrayOp>(pair.first);
auto arrayType = arrayOp.getType().cast<MemRefType>(); auto arrayShape = arrayOp.getShapedType().getShape();
auto arrayAccesses = pair.second; auto arrayAccesses = pair.second;
// Walk through each dimension of the targeted array. // Walk through each dimension of the targeted array.
@ -43,7 +43,7 @@ static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) {
SmallVector<StringRef, 4> partitionType; SmallVector<StringRef, 4> partitionType;
unsigned partitionNum = 1; unsigned partitionNum = 1;
for (size_t dim = 0, e = arrayType.getShape().size(); dim < e; ++dim) { for (size_t dim = 0, e = arrayShape.size(); dim < e; ++dim) {
// Collect all array access indices of the current dimension. // Collect all array access indices of the current dimension.
SmallVector<AffineExpr, 4> indices; SmallVector<AffineExpr, 4> indices;
for (auto accessOp : arrayAccesses) { for (auto accessOp : arrayAccesses) {

View File

@ -0,0 +1,212 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "Transforms/Passes.h"
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Dominance.h"
#include "llvm/ADT/SmallPtrSet.h"
#include <algorithm>
using namespace mlir;
using namespace scalehls;
namespace {
// The store to load forwarding relies on three conditions:
//
// 1) they need to have mathematically equivalent affine access functions
// (checked after full composition of load/store operands); this implies that
// they access the same single memref element for all iterations of the common
// surrounding loop,
//
// 2) the store op should dominate the load op,
//
// 3) among all op's that satisfy both (1) and (2), the one that postdominates
// all store op's that have a dependence into the load, is provably the last
// writer to the particular memref location being loaded at the load op, and its
// store value can be forwarded to the load. Note that the only dependences
// that are to be considered are those that are satisfied at the block* of the
// innermost common surrounding loop of the <store, load> being considered.
//
// (* A dependence being satisfied at a block: a dependence that is satisfied by
// virtue of the destination operation appearing textually / lexically after
// the source operation within the body of a 'affine.for' operation; thus, a
// dependence is always either satisfied by a loop or by a block).
//
// The above conditions are simple to check, sufficient, and powerful for most
// cases in practice - they are sufficient, but not necessary --- since they
// don't reason about loops that are guaranteed to execute at least once or
// multiple sources to forward from.
//
// TODO: more forwarding can be done when support for
// loop/conditional live-out SSA values is available.
// TODO: do general dead store elimination for memref's. This pass
// currently only eliminates the stores only if no other loads/uses (other
// than dealloc) remain.
//
struct StoreForward : public StoreForwardBase<StoreForward> {
void runOnOperation() override;
void forwardStoreToLoad(AffineReadOpInterface loadOp);
// A list of memref's that are potentially dead / could be eliminated.
SmallPtrSet<Value, 4> memrefsToErase;
// Load op's whose results were replaced by those forwarded from stores.
SmallVector<Operation *, 8> loadOpsToErase;
DominanceInfo *domInfo = nullptr;
PostDominanceInfo *postDomInfo = nullptr;
};
} // end anonymous namespace
/// Creates a pass to perform optimizations relying on memref dataflow such as
/// store to load forwarding, elimination of dead stores, and dead allocs.
std::unique_ptr<Pass> scalehls::createStoreForwardPass() {
return std::make_unique<StoreForward>();
}
// This is a straightforward implementation not optimized for speed. Optimize
// if needed.
void StoreForward::forwardStoreToLoad(AffineReadOpInterface loadOp) {
// First pass over the use list to get the minimum number of surrounding
// loops common between the load op and the store op, with min taken across
// all store ops.
SmallVector<Operation *, 8> storeOps;
unsigned minSurroundingLoops = getNestingDepth(loadOp);
for (auto *user : loadOp.getMemRef().getUsers()) {
auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
if (!storeOp)
continue;
unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp);
minSurroundingLoops = std::min(nsLoops, minSurroundingLoops);
storeOps.push_back(storeOp);
}
// The list of store op candidates for forwarding that satisfy conditions
// (1) and (2) above - they will be filtered later when checking (3).
SmallVector<Operation *, 8> fwdingCandidates;
// Store ops that have a dependence into the load (even if they aren't
// forwarding candidates). Each forwarding candidate will be checked for a
// post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
SmallVector<Operation *, 8> depSrcStores;
for (auto *storeOp : storeOps) {
MemRefAccess srcAccess(storeOp);
MemRefAccess destAccess(loadOp);
// Find stores that may be reaching the load.
FlatAffineConstraints dependenceConstraints;
unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp);
unsigned d;
// Dependences at loop depth <= minSurroundingLoops do NOT matter.
for (d = nsLoops + 1; d > minSurroundingLoops; d--) {
DependenceResult result = checkMemrefAccessDependence(
srcAccess, destAccess, d, &dependenceConstraints,
/*dependenceComponents=*/nullptr);
if (hasDependence(result))
break;
}
if (d == minSurroundingLoops)
continue;
// Stores that *may* be reaching the load.
depSrcStores.push_back(storeOp);
// 1. Check if the store and the load have mathematically equivalent
// affine access functions; this implies that they statically refer to the
// same single memref element. As an example this filters out cases like:
// store %A[%i0 + 1]
// load %A[%i0]
// store %A[%M]
// load %A[%N]
// Use the AffineValueMap difference based memref access equality checking.
if (srcAccess != destAccess)
continue;
// 2. The store has to dominate the load op to be candidate.
if (!domInfo->dominates(storeOp, loadOp)) {
llvm::outs() << *loadOp.getOperation() << "\n";
llvm::outs() << *storeOp << "\n";
llvm::outs() << "does not dominate\n";
continue;
}
// We now have a candidate for forwarding.
fwdingCandidates.push_back(storeOp);
}
// 3. Of all the store op's that meet the above criteria, the store that
// postdominates all 'depSrcStores' (if one exists) is the unique store
// providing the value to the load, i.e., provably the last writer to that
// memref loc.
// Note: this can be implemented in a cleaner way with postdominator tree
// traversals. Consider this for the future if needed.
Operation *lastWriteStoreOp = nullptr;
for (auto *storeOp : fwdingCandidates) {
if (llvm::all_of(depSrcStores, [&](Operation *depStore) {
return postDomInfo->postDominates(storeOp, depStore);
})) {
lastWriteStoreOp = storeOp;
break;
}
}
if (!lastWriteStoreOp)
return;
// Perform the actual store to load forwarding.
Value storeVal =
cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
loadOp.getValue().replaceAllUsesWith(storeVal);
// Record the memref for a later sweep to optimize away.
memrefsToErase.insert(loadOp.getMemRef());
// Record this to erase later.
loadOpsToErase.push_back(loadOp);
}
void StoreForward::runOnOperation() {
// Only supports single block functions at the moment.
FuncOp f = getOperation();
if (!llvm::hasSingleElement(f)) {
markAllAnalysesPreserved();
return;
}
domInfo = &getAnalysis<DominanceInfo>();
postDomInfo = &getAnalysis<PostDominanceInfo>();
loadOpsToErase.clear();
memrefsToErase.clear();
// Walk all load's and perform store to load forwarding.
f.walk([&](AffineReadOpInterface loadOp) { forwardStoreToLoad(loadOp); });
// Erase all load op's whose results were replaced with store fwd'ed ones.
for (auto *loadOp : loadOpsToErase)
loadOp->erase();
// Check if the store fwd'ed memrefs are now left with only stores and can
// thus be completely deleted. Note: the canonicalize pass should be able
// to do this as well, but we'll do it here since we collected these anyway.
for (auto memref : memrefsToErase) {
// If the memref hasn't been alloc'ed in this function, skip.
Operation *defOp = memref.getDefiningOp();
if (!defOp || !isa<AllocOp>(defOp))
// TODO: if the memref was returned by a 'call' operation, we
// could still erase it if the call had no side-effects.
continue;
if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
return !isa<AffineWriteOpInterface, DeallocOp>(ownerOp);
}))
continue;
// Erase all stores, the dealloc, and the alloc on the memref.
for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
user->erase();
defOp->erase();
}
}