[HLSCpp] eliminate PragmaOps, update ArrayOp definition; [Analysis] refactor Utils; [StoreForward] start of this pass
This commit is contained in:
parent
2d943dd238
commit
117a1bd0f4
|
@ -15,7 +15,7 @@ class Pass;
|
|||
namespace mlir {
|
||||
namespace scalehls {
|
||||
|
||||
std::unique_ptr<mlir::Pass> createQoREstimationPass();
|
||||
std::unique_ptr<Pass> createQoREstimationPass();
|
||||
|
||||
void registerAnalysisPasses();
|
||||
|
||||
|
|
|
@ -19,7 +19,20 @@ class HLSCppAnalysisBase {
|
|||
public:
|
||||
explicit HLSCppAnalysisBase(OpBuilder builder) : builder(builder) {}
|
||||
|
||||
OpBuilder builder;
|
||||
/// Get partition information methods.
|
||||
StringRef getPartitionType(hlscpp::ArrayOp op, unsigned dim) {
|
||||
if (auto attr = op.partition_type()[dim].cast<StringAttr>())
|
||||
return attr.getValue();
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
||||
unsigned getPartitionFactor(hlscpp::ArrayOp op, unsigned dim) {
|
||||
if (auto attr = op.partition_factor()[dim].cast<IntegerAttr>())
|
||||
return attr.getUInt();
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Get attribute value methods.
|
||||
int32_t getIntAttrValue(Operation *op, StringRef name) {
|
||||
|
@ -50,21 +63,6 @@ public:
|
|||
return "";
|
||||
}
|
||||
|
||||
/// Get partition information methods.
|
||||
StringRef getPartitionType(hlscpp::ArrayOp op, unsigned dim) {
|
||||
if (auto attr = op.partition_type()[dim].cast<StringAttr>())
|
||||
return attr.getValue();
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
||||
unsigned getPartitionFactor(hlscpp::ArrayOp op, unsigned dim) {
|
||||
if (auto attr = op.partition_factor()[dim].cast<IntegerAttr>())
|
||||
return attr.getUInt();
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Set attribute value methods.
|
||||
void setAttrValue(Operation *op, StringRef name, int32_t value) {
|
||||
op->setAttr(name, builder.getI32IntegerAttr(value));
|
||||
|
@ -82,46 +80,31 @@ public:
|
|||
op->setAttr(name, builder.getStringAttr(value));
|
||||
}
|
||||
|
||||
/// Set schedule attribute methods.
|
||||
void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
|
||||
setAttrValue(op, "schedule_begin", begin);
|
||||
setAttrValue(op, "schedule_end", end);
|
||||
}
|
||||
OpBuilder builder;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Common Used Type Declarations
|
||||
// Helper methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Profiled latency map.
|
||||
using LatencyMap = llvm::StringMap<unsigned>;
|
||||
|
||||
// For storing all memory access operations (including AffineLoadOp and
|
||||
// AffineStoreOp) indexed by the array instance (ArrayOp).
|
||||
// For storing all affine memory access operations (including AffineLoadOp and
|
||||
// AffineStoreOp) indexed by the array (ArrayOp).
|
||||
using LoadStores = SmallVector<Operation *, 16>;
|
||||
using LoadStoresMap = DenseMap<Operation *, LoadStores>;
|
||||
|
||||
// For storing all dependent operations indexed by the source operation.
|
||||
using Depends = SmallVector<Operation *, 16>;
|
||||
using DependsMap = DenseMap<Operation *, Depends>;
|
||||
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
|
||||
// AffineIfOp is transparent.
|
||||
bool checkSameLevel(Operation *lhsOp, Operation *rhsOp);
|
||||
|
||||
// Indicate the unoccupied memory ports number.
|
||||
struct PortInfo {
|
||||
PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
|
||||
: rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
|
||||
// Get the pointer of the scrOp's parent loop, which should locate at the same
|
||||
// level with dstOp's any parent loop.
|
||||
Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
|
||||
|
||||
unsigned rdPort;
|
||||
unsigned wrPort;
|
||||
unsigned rdwrPort;
|
||||
};
|
||||
/// Get the definition ArrayOp given any memory access operation.
|
||||
hlscpp::ArrayOp getArrayOp(Operation *op);
|
||||
|
||||
// For storing ports number of all partitions indexed by the array instance
|
||||
// (ArrayOp).
|
||||
using Ports = SmallVector<PortInfo, 16>;
|
||||
using PortsMap = DenseMap<Operation *, Ports>;
|
||||
|
||||
// For storing PortsMap indexed by the scheduling level.
|
||||
using PortsMapDict = DenseMap<unsigned, PortsMap>;
|
||||
/// Collect all load and store operations in the block.
|
||||
void getLoadStoresMap(Block &block, LoadStoresMap &map);
|
||||
|
||||
} // namespace scalehls
|
||||
} // namespace mlir
|
||||
|
|
|
@ -15,8 +15,8 @@ class Pass;
|
|||
namespace mlir {
|
||||
namespace scalehls {
|
||||
|
||||
std::unique_ptr<mlir::Pass> createConvertToHLSCppPass();
|
||||
std::unique_ptr<mlir::Pass> createHLSKernelToAffinePass();
|
||||
std::unique_ptr<Pass> createConvertToHLSCppPass();
|
||||
std::unique_ptr<Pass> createHLSKernelToAffinePass();
|
||||
|
||||
void registerConversionPasses();
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@ class HLSCppOp<string mnemonic, list<OpTrait> traits = []> :
|
|||
include "Interfaces.td"
|
||||
include "Attributes.td"
|
||||
|
||||
include "PragmaOps.td"
|
||||
include "StructureOps.td"
|
||||
|
||||
#endif // SCALEHLS_DIALECT_HLSCPP_HLSCPP_TD
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
//===-------------------------------------------------------*- tablegen -*-===//
|
||||
// Deprecated. Will be removed somehow in someday.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD
|
||||
#define SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD
|
||||
|
||||
def ArrayPragmaOp : HLSCppOp<"array_pragma", [PragmaOpInterface]> {
|
||||
let summary = "Apply array pragmas";
|
||||
let description = [{
|
||||
This hlscpp.func_pragma operation represent pragmas for arrays, such as
|
||||
array partition, interface, and bind storage pragma.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
// Targeted array.
|
||||
Type<IsShapedTypePred> : $variable,
|
||||
|
||||
// Interface-related attributes.
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $interface,
|
||||
DefaultValuedAttr<InterfaceModeAttr, "m_axi"> : $interface_mode,
|
||||
DefaultValuedAttr<PositiveUI32Attr, "1024"> : $interface_depth,
|
||||
|
||||
// BindStorage-related attributes.
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $storage,
|
||||
DefaultValuedAttr<StorageTypeAttr, "ram_2p"> : $storage_type,
|
||||
DefaultValuedAttr<StorageImplAttr, "bram"> : $storage_impl,
|
||||
|
||||
// ArrayPartition-related attributes.
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $partition,
|
||||
DefaultValuedAttr<PartitionTypeArrayAttr, "{}"> : $partition_type,
|
||||
DefaultValuedAttr<PositiveUI32ArrayAttr, "{}"> : $partition_factor
|
||||
);
|
||||
|
||||
let assemblyFormat = [{`(` $variable `)` attr-dict `:` type($variable)}];
|
||||
let extraClassDeclaration = [{}];
|
||||
}
|
||||
|
||||
def LoopPragmaOp : HLSCppOp<"loop_pragma", [
|
||||
PragmaOpInterface,
|
||||
HasParent<"AffineForOp">
|
||||
]> {
|
||||
let summary = "Apply loop pragmas";
|
||||
let description = [{
|
||||
This hlscpp.loop_pragma operation represent pragmas for loops, such as
|
||||
pipeline, and unroll pragma.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
// Pipeline-related attributes.
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $pipeline,
|
||||
DefaultValuedAttr<PositiveUI32Attr, "1"> : $pipeline_II,
|
||||
|
||||
// Loop-related attributes.
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $flatten,
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $unroll
|
||||
);
|
||||
|
||||
let assemblyFormat = [{attr-dict}];
|
||||
let extraClassDeclaration = [{}];
|
||||
}
|
||||
|
||||
def FuncPragmaOp : HLSCppOp<"func_pragma", [
|
||||
PragmaOpInterface,
|
||||
HasParent<"FuncOp">
|
||||
]> {
|
||||
let summary = "Apply function pragmas";
|
||||
let description = [{
|
||||
This hlscpp.func_pragma operation represent pragmas for functions, such as
|
||||
pipeline, and dataflow pragma.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $dataflow
|
||||
);
|
||||
|
||||
let assemblyFormat = [{attr-dict}];
|
||||
let extraClassDeclaration = [{}];
|
||||
}
|
||||
|
||||
#endif // SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD
|
|
@ -47,6 +47,12 @@ def ArrayOp : HLSCppOp<"array", [SameOperandsAndResultType]> {
|
|||
);
|
||||
|
||||
let results = (outs Type<IsShapedTypePred> : $output);
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
ShapedType getShapedType() {
|
||||
return getType().cast<ShapedType>();
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
def EndOp : HLSCppOp<"end", [Terminator]> {
|
||||
|
|
|
@ -54,12 +54,9 @@ public:
|
|||
SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp,
|
||||
SignExtendIOp, IndexCastOp, CallOp, ReturnOp,
|
||||
// Structure operations.
|
||||
AssignOp, ArrayOp, EndOp,
|
||||
// Pragma operations.
|
||||
LoopPragmaOp, FuncPragmaOp, ArrayPragmaOp>(
|
||||
[&](auto opNode) -> ResultType {
|
||||
return thisCast->visitOp(opNode, args...);
|
||||
})
|
||||
AssignOp, ArrayOp, EndOp>([&](auto opNode) -> ResultType {
|
||||
return thisCast->visitOp(opNode, args...);
|
||||
})
|
||||
.Default([&](auto opNode) -> ResultType {
|
||||
return thisCast->visitInvalidOp(op, args...);
|
||||
});
|
||||
|
@ -191,11 +188,6 @@ public:
|
|||
HANDLE(AssignOp);
|
||||
HANDLE(ArrayOp);
|
||||
HANDLE(EndOp);
|
||||
|
||||
// Pragma operations.
|
||||
HANDLE(LoopPragmaOp);
|
||||
HANDLE(FuncPragmaOp);
|
||||
HANDLE(ArrayPragmaOp);
|
||||
#undef HANDLE
|
||||
};
|
||||
} // namespace scalehls
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/Dialect.h"
|
||||
#include "mlir/IR/OpDefinition.h"
|
||||
#include "mlir/IR/StandardTypes.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace scalehls {
|
||||
|
|
|
@ -11,6 +11,14 @@ def HLSKernelOpInterface : OpInterface<"HLSKernelOpInterface"> {
|
|||
let description = [{
|
||||
This interface indicates the operation is an HLS kernel.
|
||||
}];
|
||||
|
||||
let methods = [
|
||||
InterfaceMethod<
|
||||
"Return the shaped type of the i-th operand",
|
||||
"ShapedType", "getOperandShapedType", (ins "unsigned" : $i),
|
||||
[{ return $_op.getOperation()->getOperand(i).getType().template cast<ShapedType>(); }]
|
||||
>
|
||||
];
|
||||
}
|
||||
|
||||
#endif // SCALEHLS_DIALECT_HLSKERNEL_INTERFACES_TD
|
||||
|
|
|
@ -16,21 +16,22 @@ namespace mlir {
|
|||
namespace scalehls {
|
||||
|
||||
/// Pragma optimization passes.
|
||||
std::unique_ptr<mlir::Pass> createPragmaDSEPass();
|
||||
std::unique_ptr<mlir::Pass> createLoopPipeliningPass();
|
||||
std::unique_ptr<mlir::Pass> createArrayPartitionPass();
|
||||
std::unique_ptr<Pass> createPragmaDSEPass();
|
||||
std::unique_ptr<Pass> createLoopPipeliningPass();
|
||||
std::unique_ptr<Pass> createArrayPartitionPass();
|
||||
|
||||
/// Loop optimization passes.
|
||||
std::unique_ptr<mlir::Pass> createAffineLoopPerfectionPass();
|
||||
std::unique_ptr<mlir::Pass> createPartialAffineLoopTilePass();
|
||||
std::unique_ptr<mlir::Pass> createRemoveVarLoopBoundPass();
|
||||
std::unique_ptr<Pass> createAffineLoopPerfectionPass();
|
||||
std::unique_ptr<Pass> createPartialAffineLoopTilePass();
|
||||
std::unique_ptr<Pass> createRemoveVarLoopBoundPass();
|
||||
|
||||
/// Dataflow optimization passes.
|
||||
std::unique_ptr<mlir::Pass> createSplitFunctionPass();
|
||||
std::unique_ptr<mlir::Pass> createLegalizeDataflowPass();
|
||||
std::unique_ptr<Pass> createSplitFunctionPass();
|
||||
std::unique_ptr<Pass> createLegalizeDataflowPass();
|
||||
|
||||
/// Bufferization passes.
|
||||
std::unique_ptr<mlir::Pass> createHLSKernelBufferizePass();
|
||||
std::unique_ptr<Pass> createHLSKernelBufferizePass();
|
||||
std::unique_ptr<Pass> createStoreForwardPass();
|
||||
|
||||
void registerTransformsPasses();
|
||||
|
||||
|
|
|
@ -136,4 +136,14 @@ def HLSKernelBufferize : Pass<"hlskernel-bufferize", "FuncOp"> {
|
|||
let constructor = "mlir::scalehls::createHLSKernelBufferizePass()";
|
||||
}
|
||||
|
||||
def StoreForward : Pass<"store-forward", "FuncOp"> {
|
||||
let summary = "Forward store to load, including conditional stores";
|
||||
let description = [{
|
||||
This store-forward pass is similar to memref-dataflow-opt, but support to
|
||||
forward stores in if statements.
|
||||
}];
|
||||
|
||||
let constructor = "mlir::scalehls::createStoreForwardPass()";
|
||||
}
|
||||
|
||||
#endif // SCALEHLS_TRANSFORMS_PASSES_TD
|
||||
|
|
|
@ -18,8 +18,10 @@ using namespace mlir;
|
|||
using namespace scalehls;
|
||||
using namespace hlscpp;
|
||||
|
||||
using LatencyMap = llvm::StringMap<unsigned>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HLSCppEstimator Class Delaration
|
||||
// HLSCppEstimator Class
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
|
@ -33,7 +35,34 @@ public:
|
|||
getFuncMemRefDepends();
|
||||
}
|
||||
|
||||
// Indicate the unoccupied memory ports number.
|
||||
struct PortInfo {
|
||||
PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
|
||||
: rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
|
||||
|
||||
unsigned rdPort;
|
||||
unsigned wrPort;
|
||||
unsigned rdwrPort;
|
||||
};
|
||||
|
||||
// For storing ports number of all partitions indexed by the array (ArrayOp).
|
||||
using Ports = SmallVector<PortInfo, 16>;
|
||||
using PortsMap = DenseMap<Operation *, Ports>;
|
||||
|
||||
// For storing PortsMap indexed by the scheduling level.
|
||||
using PortsMapDict = DenseMap<unsigned, PortsMap>;
|
||||
|
||||
// For storing all dependent operations indexed by the source operation.
|
||||
using Depends = SmallVector<Operation *, 16>;
|
||||
using DependsMap = DenseMap<Operation *, Depends>;
|
||||
|
||||
void getFuncMemRefDepends();
|
||||
|
||||
void setScheduleValue(Operation *op, unsigned begin, unsigned end) {
|
||||
setAttrValue(op, "schedule_begin", begin);
|
||||
setAttrValue(op, "schedule_end", end);
|
||||
}
|
||||
|
||||
using HLSCppVisitorBase::visitOp;
|
||||
Optional<unsigned> visitUnhandledOp(Operation *op, unsigned begin) {
|
||||
// Default latency of any unhandled operation is 1.
|
||||
|
@ -83,114 +112,6 @@ public:
|
|||
};
|
||||
} // namespace
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helper methods
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
|
||||
// AffineIfOp is transparent.
|
||||
static bool checkSameLevel(Operation *lhsOp, Operation *rhsOp) {
|
||||
// If lhsOp and rhsOp are already at the same level, return true.
|
||||
if (lhsOp->getBlock() == rhsOp->getBlock())
|
||||
return true;
|
||||
|
||||
// Helper to get all surrounding AffineIfOps.
|
||||
auto getSurroundIfs =
|
||||
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
|
||||
nests.push_back(op);
|
||||
auto currentOp = op;
|
||||
while (true) {
|
||||
if (auto parentOp = currentOp->getParentOfType<AffineIfOp>()) {
|
||||
nests.push_back(parentOp);
|
||||
currentOp = parentOp;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
SmallVector<Operation *, 4> lhsNests;
|
||||
SmallVector<Operation *, 4> rhsNests;
|
||||
|
||||
getSurroundIfs(lhsOp, lhsNests);
|
||||
getSurroundIfs(rhsOp, rhsNests);
|
||||
|
||||
// If any parent of lhsOp and any parent of rhsOp are at the same level,
|
||||
// return true.
|
||||
for (auto lhs : lhsNests)
|
||||
for (auto rhs : rhsNests)
|
||||
if (lhs->getBlock() == rhs->getBlock())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the pointer of the scrOp's parent loop, which should locate at the same
|
||||
// level with dstOp's any parent loop.
|
||||
static Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
|
||||
// If srcOp and dstOp are already at the same level, return the srcOp.
|
||||
if (checkSameLevel(srcOp, dstOp))
|
||||
return dstOp;
|
||||
|
||||
// Helper to get all surrouding AffineForOps. AffineIfOps are skipped.
|
||||
auto getSurroundFors =
|
||||
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
|
||||
nests.push_back(op);
|
||||
auto currentOp = op;
|
||||
while (true) {
|
||||
if (auto parentOp = currentOp->getParentOfType<AffineForOp>()) {
|
||||
nests.push_back(parentOp);
|
||||
currentOp = parentOp;
|
||||
} else if (auto parentOp = currentOp->getParentOfType<AffineIfOp>())
|
||||
currentOp = parentOp;
|
||||
else
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
SmallVector<Operation *, 4> srcNests;
|
||||
SmallVector<Operation *, 4> dstNests;
|
||||
|
||||
getSurroundFors(srcOp, srcNests);
|
||||
getSurroundFors(dstOp, dstNests);
|
||||
|
||||
// If any parent of srcOp (or itself) and any parent of dstOp (or itself) are
|
||||
// at the same level, return the pointer.
|
||||
for (auto src : srcNests)
|
||||
for (auto dst : dstNests)
|
||||
if (checkSameLevel(src, dst))
|
||||
return dst;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Get the definition ArrayOp given any memory access operation.
|
||||
static ArrayOp getArrayOp(Operation *op) {
|
||||
auto defOp = MemRefAccess(op).memref.getDefiningOp();
|
||||
assert(defOp && "MemRef is block argument");
|
||||
|
||||
auto arrayOp = dyn_cast<ArrayOp>(defOp);
|
||||
assert(arrayOp && "MemRef is not defined by ArrayOp");
|
||||
|
||||
return arrayOp;
|
||||
}
|
||||
|
||||
/// Collect all load and store operations in the block.
|
||||
static void getLoadStoresMap(Block &block, LoadStoresMap &map) {
|
||||
for (auto &op : block) {
|
||||
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
|
||||
map[getArrayOp(&op)].push_back(&op);
|
||||
else if (op.getNumRegions()) {
|
||||
for (auto ®ion : op.getRegions())
|
||||
for (auto &block : region)
|
||||
getLoadStoresMap(block, map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HLSCppEstimator Class Definition
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Collect all dependencies detected in the function.
|
||||
void HLSCppEstimator::getFuncMemRefDepends() {
|
||||
// TODO: This can be simplified by traversing each ArrayOp in the function.
|
||||
|
@ -258,7 +179,7 @@ int32_t HLSCppEstimator::getPartitionIndex(Operation *op) {
|
|||
if (type == "cyclic")
|
||||
idxExpr = expr % builder.getAffineConstantExpr(factor);
|
||||
else if (type == "block") {
|
||||
auto size = arrayOp.getType().cast<ShapedType>().getShape()[dim];
|
||||
auto size = arrayOp.getShapedType().getShape()[dim];
|
||||
idxExpr = expr.floorDiv(
|
||||
builder.getAffineConstantExpr((size + factor - 1) / factor));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
//===------------------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Analysis/Utils.h"
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
|
||||
using namespace mlir;
|
||||
using namespace scalehls;
|
||||
|
||||
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
|
||||
// AffineIfOp is transparent.
|
||||
bool scalehls::checkSameLevel(Operation *lhsOp, Operation *rhsOp) {
|
||||
// If lhsOp and rhsOp are already at the same level, return true.
|
||||
if (lhsOp->getBlock() == rhsOp->getBlock())
|
||||
return true;
|
||||
|
||||
// Helper to get all surrounding AffineIfOps.
|
||||
auto getSurroundIfs =
|
||||
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
|
||||
nests.push_back(op);
|
||||
auto currentOp = op;
|
||||
while (true) {
|
||||
if (auto parentOp = currentOp->getParentOfType<AffineIfOp>()) {
|
||||
nests.push_back(parentOp);
|
||||
currentOp = parentOp;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
SmallVector<Operation *, 4> lhsNests;
|
||||
SmallVector<Operation *, 4> rhsNests;
|
||||
|
||||
getSurroundIfs(lhsOp, lhsNests);
|
||||
getSurroundIfs(rhsOp, rhsNests);
|
||||
|
||||
// If any parent of lhsOp and any parent of rhsOp are at the same level,
|
||||
// return true.
|
||||
for (auto lhs : lhsNests)
|
||||
for (auto rhs : rhsNests)
|
||||
if (lhs->getBlock() == rhs->getBlock())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the pointer of the scrOp's parent loop, which should locate at the same
|
||||
// level with dstOp's any parent loop.
|
||||
Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
|
||||
// If srcOp and dstOp are already at the same level, return the srcOp.
|
||||
if (checkSameLevel(srcOp, dstOp))
|
||||
return dstOp;
|
||||
|
||||
// Helper to get all surrouding AffineForOps. AffineIfOps are skipped.
|
||||
auto getSurroundFors =
|
||||
([&](Operation *op, SmallVector<Operation *, 4> &nests) {
|
||||
nests.push_back(op);
|
||||
auto currentOp = op;
|
||||
while (true) {
|
||||
if (auto parentOp = currentOp->getParentOfType<AffineForOp>()) {
|
||||
nests.push_back(parentOp);
|
||||
currentOp = parentOp;
|
||||
} else if (auto parentOp = currentOp->getParentOfType<AffineIfOp>())
|
||||
currentOp = parentOp;
|
||||
else
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
SmallVector<Operation *, 4> srcNests;
|
||||
SmallVector<Operation *, 4> dstNests;
|
||||
|
||||
getSurroundFors(srcOp, srcNests);
|
||||
getSurroundFors(dstOp, dstNests);
|
||||
|
||||
// If any parent of srcOp (or itself) and any parent of dstOp (or itself) are
|
||||
// at the same level, return the pointer.
|
||||
for (auto src : srcNests)
|
||||
for (auto dst : dstNests)
|
||||
if (checkSameLevel(src, dst))
|
||||
return dst;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Get the definition ArrayOp given any memory access operation.
|
||||
hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
|
||||
auto defOp = MemRefAccess(op).memref.getDefiningOp();
|
||||
assert(defOp && "MemRef is block argument");
|
||||
|
||||
auto arrayOp = dyn_cast<hlscpp::ArrayOp>(defOp);
|
||||
assert(arrayOp && "MemRef is not defined by ArrayOp");
|
||||
|
||||
return arrayOp;
|
||||
}
|
||||
|
||||
/// Collect all load and store operations in the block.
|
||||
void scalehls::getLoadStoresMap(Block &block, LoadStoresMap &map) {
|
||||
for (auto &op : block) {
|
||||
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
|
||||
map[getArrayOp(&op)].push_back(&op);
|
||||
else if (op.getNumRegions()) {
|
||||
for (auto ®ion : op.getRegions())
|
||||
for (auto &block : region)
|
||||
getLoadStoresMap(block, map);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -201,11 +201,6 @@ public:
|
|||
void emitAssign(AssignOp *op);
|
||||
void emitArray(ArrayOp *op);
|
||||
|
||||
/// Pragma operation emitters.
|
||||
void emitLoopPragma(LoopPragmaOp *op);
|
||||
void emitFuncPragma(FuncPragmaOp *op);
|
||||
void emitArrayPragma(ArrayPragmaOp *op);
|
||||
|
||||
/// Top-level MLIR module emitter.
|
||||
void emitModule(ModuleOp module);
|
||||
|
||||
|
@ -501,11 +496,6 @@ public:
|
|||
bool visitOp(ArrayOp op) { return emitter.emitArray(&op), true; }
|
||||
bool visitOp(EndOp op) { return true; }
|
||||
|
||||
/// Pragma operations.
|
||||
bool visitOp(LoopPragmaOp op) { return emitter.emitLoopPragma(&op), true; }
|
||||
bool visitOp(FuncPragmaOp op) { return emitter.emitFuncPragma(&op), true; }
|
||||
bool visitOp(ArrayPragmaOp op) { return emitter.emitArrayPragma(&op), true; }
|
||||
|
||||
private:
|
||||
ModuleEmitter &emitter;
|
||||
};
|
||||
|
@ -1287,80 +1277,6 @@ void ModuleEmitter::emitArray(ArrayOp *op) {
|
|||
os << "\n";
|
||||
}
|
||||
|
||||
/// Pragma operation emitters. (deprecated)
|
||||
void ModuleEmitter::emitLoopPragma(LoopPragmaOp *op) {
|
||||
indent();
|
||||
os << "#pragma HLS pipeline";
|
||||
if (op->pipeline())
|
||||
os << " II=" << op->pipeline_II();
|
||||
else
|
||||
os << " off\n";
|
||||
|
||||
if (op->unroll()) {
|
||||
indent();
|
||||
os << "#pragma HLS unroll\n";
|
||||
}
|
||||
|
||||
// An empty line.
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
void ModuleEmitter::emitFuncPragma(FuncPragmaOp *op) {
|
||||
if (op->dataflow()) {
|
||||
indent();
|
||||
os << "#pragma HLS dataflow\n";
|
||||
|
||||
// An empty line.
|
||||
os << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void ModuleEmitter::emitArrayPragma(ArrayPragmaOp *op) {
|
||||
if (op->interface()) {
|
||||
|
||||
// Emit interface pragma.
|
||||
indent();
|
||||
os << "#pragma HLS interface";
|
||||
os << " " << op->interface_mode();
|
||||
os << " port=";
|
||||
emitValue(op->getOperand());
|
||||
if (op->interface_mode() == "m_axi") {
|
||||
os << " depth=" << op->interface_depth();
|
||||
os << " offset=slave\n";
|
||||
} else
|
||||
os << " storage_type=" << op->storage_type() << "\n";
|
||||
} else {
|
||||
|
||||
// Emit bind_storage pragma.
|
||||
indent();
|
||||
os << "#pragma HLS bind_storage";
|
||||
os << " variable=";
|
||||
emitValue(op->getOperand());
|
||||
os << " type=" << op->storage_type();
|
||||
os << " impl=" << op->storage_impl() << "\n";
|
||||
}
|
||||
|
||||
auto type = op->getOperand().getType().cast<ShapedType>();
|
||||
if (op->partition() && type.hasStaticShape()) {
|
||||
|
||||
// Emit array_partition pragma(s).
|
||||
for (unsigned dim = 0; dim < type.getRank(); ++dim) {
|
||||
indent();
|
||||
os << "#pragma HLS array_partition";
|
||||
os << " variable=";
|
||||
emitValue(op->getOperand());
|
||||
auto partitionType =
|
||||
op->partition_type()[dim].cast<StringAttr>().getValue();
|
||||
os << " " << partitionType;
|
||||
if (partitionType != "complete")
|
||||
os << " factor="
|
||||
<< op->partition_factor()[dim].cast<IntegerAttr>().getUInt();
|
||||
os << " dim=" << dim + 1 << "\n";
|
||||
}
|
||||
}
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
/// C++ component emitters.
|
||||
void ModuleEmitter::emitValue(Value val, unsigned rank, bool isPtr) {
|
||||
assert(!(rank && isPtr) && "should be either an array or a pointer.");
|
||||
|
|
|
@ -35,7 +35,7 @@ template <typename OpType>
|
|||
static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) {
|
||||
for (auto pair : map) {
|
||||
auto arrayOp = cast<ArrayOp>(pair.first);
|
||||
auto arrayType = arrayOp.getType().cast<MemRefType>();
|
||||
auto arrayShape = arrayOp.getShapedType().getShape();
|
||||
auto arrayAccesses = pair.second;
|
||||
|
||||
// Walk through each dimension of the targeted array.
|
||||
|
@ -43,7 +43,7 @@ static void applyArrayPartition(LoadStoresMap &map, OpBuilder &builder) {
|
|||
SmallVector<StringRef, 4> partitionType;
|
||||
unsigned partitionNum = 1;
|
||||
|
||||
for (size_t dim = 0, e = arrayType.getShape().size(); dim < e; ++dim) {
|
||||
for (size_t dim = 0, e = arrayShape.size(); dim < e; ++dim) {
|
||||
// Collect all array access indices of the current dimension.
|
||||
SmallVector<AffineExpr, 4> indices;
|
||||
for (auto accessOp : arrayAccesses) {
|
||||
|
|
|
@ -0,0 +1,212 @@
|
|||
//===------------------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Analysis/Utils.h"
|
||||
#include "Transforms/Passes.h"
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Analysis/Utils.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
#include "mlir/IR/Dominance.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace mlir;
|
||||
using namespace scalehls;
|
||||
|
||||
namespace {
|
||||
// The store to load forwarding relies on three conditions:
|
||||
//
|
||||
// 1) they need to have mathematically equivalent affine access functions
|
||||
// (checked after full composition of load/store operands); this implies that
|
||||
// they access the same single memref element for all iterations of the common
|
||||
// surrounding loop,
|
||||
//
|
||||
// 2) the store op should dominate the load op,
|
||||
//
|
||||
// 3) among all op's that satisfy both (1) and (2), the one that postdominates
|
||||
// all store op's that have a dependence into the load, is provably the last
|
||||
// writer to the particular memref location being loaded at the load op, and its
|
||||
// store value can be forwarded to the load. Note that the only dependences
|
||||
// that are to be considered are those that are satisfied at the block* of the
|
||||
// innermost common surrounding loop of the <store, load> being considered.
|
||||
//
|
||||
// (* A dependence being satisfied at a block: a dependence that is satisfied by
|
||||
// virtue of the destination operation appearing textually / lexically after
|
||||
// the source operation within the body of a 'affine.for' operation; thus, a
|
||||
// dependence is always either satisfied by a loop or by a block).
|
||||
//
|
||||
// The above conditions are simple to check, sufficient, and powerful for most
|
||||
// cases in practice - they are sufficient, but not necessary --- since they
|
||||
// don't reason about loops that are guaranteed to execute at least once or
|
||||
// multiple sources to forward from.
|
||||
//
|
||||
// TODO: more forwarding can be done when support for
|
||||
// loop/conditional live-out SSA values is available.
|
||||
// TODO: do general dead store elimination for memref's. This pass
|
||||
// currently only eliminates the stores only if no other loads/uses (other
|
||||
// than dealloc) remain.
|
||||
//
|
||||
struct StoreForward : public StoreForwardBase<StoreForward> {
|
||||
void runOnOperation() override;
|
||||
|
||||
void forwardStoreToLoad(AffineReadOpInterface loadOp);
|
||||
|
||||
// A list of memref's that are potentially dead / could be eliminated.
|
||||
SmallPtrSet<Value, 4> memrefsToErase;
|
||||
// Load op's whose results were replaced by those forwarded from stores.
|
||||
SmallVector<Operation *, 8> loadOpsToErase;
|
||||
|
||||
DominanceInfo *domInfo = nullptr;
|
||||
PostDominanceInfo *postDomInfo = nullptr;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
/// Creates a pass to perform optimizations relying on memref dataflow such as
|
||||
/// store to load forwarding, elimination of dead stores, and dead allocs.
|
||||
std::unique_ptr<Pass> scalehls::createStoreForwardPass() {
|
||||
return std::make_unique<StoreForward>();
|
||||
}
|
||||
|
||||
// This is a straightforward implementation not optimized for speed. Optimize
|
||||
// if needed.
|
||||
void StoreForward::forwardStoreToLoad(AffineReadOpInterface loadOp) {
|
||||
// First pass over the use list to get the minimum number of surrounding
|
||||
// loops common between the load op and the store op, with min taken across
|
||||
// all store ops.
|
||||
SmallVector<Operation *, 8> storeOps;
|
||||
unsigned minSurroundingLoops = getNestingDepth(loadOp);
|
||||
for (auto *user : loadOp.getMemRef().getUsers()) {
|
||||
auto storeOp = dyn_cast<AffineWriteOpInterface>(user);
|
||||
if (!storeOp)
|
||||
continue;
|
||||
unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp);
|
||||
minSurroundingLoops = std::min(nsLoops, minSurroundingLoops);
|
||||
storeOps.push_back(storeOp);
|
||||
}
|
||||
|
||||
// The list of store op candidates for forwarding that satisfy conditions
|
||||
// (1) and (2) above - they will be filtered later when checking (3).
|
||||
SmallVector<Operation *, 8> fwdingCandidates;
|
||||
|
||||
// Store ops that have a dependence into the load (even if they aren't
|
||||
// forwarding candidates). Each forwarding candidate will be checked for a
|
||||
// post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
|
||||
SmallVector<Operation *, 8> depSrcStores;
|
||||
|
||||
for (auto *storeOp : storeOps) {
|
||||
MemRefAccess srcAccess(storeOp);
|
||||
MemRefAccess destAccess(loadOp);
|
||||
// Find stores that may be reaching the load.
|
||||
FlatAffineConstraints dependenceConstraints;
|
||||
unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp);
|
||||
unsigned d;
|
||||
// Dependences at loop depth <= minSurroundingLoops do NOT matter.
|
||||
for (d = nsLoops + 1; d > minSurroundingLoops; d--) {
|
||||
DependenceResult result = checkMemrefAccessDependence(
|
||||
srcAccess, destAccess, d, &dependenceConstraints,
|
||||
/*dependenceComponents=*/nullptr);
|
||||
if (hasDependence(result))
|
||||
break;
|
||||
}
|
||||
if (d == minSurroundingLoops)
|
||||
continue;
|
||||
|
||||
// Stores that *may* be reaching the load.
|
||||
depSrcStores.push_back(storeOp);
|
||||
|
||||
// 1. Check if the store and the load have mathematically equivalent
|
||||
// affine access functions; this implies that they statically refer to the
|
||||
// same single memref element. As an example this filters out cases like:
|
||||
// store %A[%i0 + 1]
|
||||
// load %A[%i0]
|
||||
// store %A[%M]
|
||||
// load %A[%N]
|
||||
// Use the AffineValueMap difference based memref access equality checking.
|
||||
if (srcAccess != destAccess)
|
||||
continue;
|
||||
|
||||
// 2. The store has to dominate the load op to be candidate.
|
||||
if (!domInfo->dominates(storeOp, loadOp)) {
|
||||
llvm::outs() << *loadOp.getOperation() << "\n";
|
||||
llvm::outs() << *storeOp << "\n";
|
||||
llvm::outs() << "does not dominate\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
// We now have a candidate for forwarding.
|
||||
fwdingCandidates.push_back(storeOp);
|
||||
}
|
||||
|
||||
// 3. Of all the store op's that meet the above criteria, the store that
|
||||
// postdominates all 'depSrcStores' (if one exists) is the unique store
|
||||
// providing the value to the load, i.e., provably the last writer to that
|
||||
// memref loc.
|
||||
// Note: this can be implemented in a cleaner way with postdominator tree
|
||||
// traversals. Consider this for the future if needed.
|
||||
Operation *lastWriteStoreOp = nullptr;
|
||||
for (auto *storeOp : fwdingCandidates) {
|
||||
if (llvm::all_of(depSrcStores, [&](Operation *depStore) {
|
||||
return postDomInfo->postDominates(storeOp, depStore);
|
||||
})) {
|
||||
lastWriteStoreOp = storeOp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!lastWriteStoreOp)
|
||||
return;
|
||||
|
||||
// Perform the actual store to load forwarding.
|
||||
Value storeVal =
|
||||
cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
|
||||
loadOp.getValue().replaceAllUsesWith(storeVal);
|
||||
// Record the memref for a later sweep to optimize away.
|
||||
memrefsToErase.insert(loadOp.getMemRef());
|
||||
// Record this to erase later.
|
||||
loadOpsToErase.push_back(loadOp);
|
||||
}
|
||||
|
||||
void StoreForward::runOnOperation() {
|
||||
// Only supports single block functions at the moment.
|
||||
FuncOp f = getOperation();
|
||||
if (!llvm::hasSingleElement(f)) {
|
||||
markAllAnalysesPreserved();
|
||||
return;
|
||||
}
|
||||
|
||||
domInfo = &getAnalysis<DominanceInfo>();
|
||||
postDomInfo = &getAnalysis<PostDominanceInfo>();
|
||||
|
||||
loadOpsToErase.clear();
|
||||
memrefsToErase.clear();
|
||||
|
||||
// Walk all load's and perform store to load forwarding.
|
||||
f.walk([&](AffineReadOpInterface loadOp) { forwardStoreToLoad(loadOp); });
|
||||
|
||||
// Erase all load op's whose results were replaced with store fwd'ed ones.
|
||||
for (auto *loadOp : loadOpsToErase)
|
||||
loadOp->erase();
|
||||
|
||||
// Check if the store fwd'ed memrefs are now left with only stores and can
|
||||
// thus be completely deleted. Note: the canonicalize pass should be able
|
||||
// to do this as well, but we'll do it here since we collected these anyway.
|
||||
for (auto memref : memrefsToErase) {
|
||||
// If the memref hasn't been alloc'ed in this function, skip.
|
||||
Operation *defOp = memref.getDefiningOp();
|
||||
if (!defOp || !isa<AllocOp>(defOp))
|
||||
// TODO: if the memref was returned by a 'call' operation, we
|
||||
// could still erase it if the call had no side-effects.
|
||||
continue;
|
||||
if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
|
||||
return !isa<AffineWriteOpInterface, DeallocOp>(ownerOp);
|
||||
}))
|
||||
continue;
|
||||
|
||||
// Erase all stores, the dealloc, and the alloc on the memref.
|
||||
for (auto *user : llvm::make_early_inc_range(memref.getUsers()))
|
||||
user->erase();
|
||||
defOp->erase();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue