[ArrayPartition] impl of this pass; note that due to the insufficient AffineExpr simplification mechanism in MLIR, this pass can't handle too complicated cases
This commit is contained in:
parent
03b2695fe7
commit
f9bf38240e
|
@ -21,7 +21,8 @@ def PositiveUI32ArrayAttr : TypedArrayAttrBase<PositiveUI32Attr, ""> {}
|
||||||
def PartitionTypeAttr : StrEnumAttr<"PartitionType", "", [
|
def PartitionTypeAttr : StrEnumAttr<"PartitionType", "", [
|
||||||
StrEnumAttrCase<"cyclic", 0>,
|
StrEnumAttrCase<"cyclic", 0>,
|
||||||
StrEnumAttrCase<"block", 1>,
|
StrEnumAttrCase<"block", 1>,
|
||||||
StrEnumAttrCase<"complete", 2>
|
StrEnumAttrCase<"complete", 2>,
|
||||||
|
StrEnumAttrCase<"none", 3>
|
||||||
]> {
|
]> {
|
||||||
let cppNamespace = "::mlir::scalehls::hlscpp";
|
let cppNamespace = "::mlir::scalehls::hlscpp";
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,11 +33,13 @@ bool HLSCppAnalyzer::visitOp(AffineForOp op) {
|
||||||
// If the current loop is annotated as unroll, all inner loops and itself are
|
// If the current loop is annotated as unroll, all inner loops and itself are
|
||||||
// automatically unrolled.
|
// automatically unrolled.
|
||||||
if (getBoolAttrValue(op, "unroll")) {
|
if (getBoolAttrValue(op, "unroll")) {
|
||||||
op.emitRemark("this loop and all inner loops are automatically unrolled.");
|
|
||||||
op.walk([&](AffineForOp forOp) {
|
op.walk([&](AffineForOp forOp) {
|
||||||
if (forOp.getLoopBody().getBlocks().size() != 1)
|
if (forOp.getLoopBody().getBlocks().size() != 1)
|
||||||
op.emitError("has zero or more than one basic blocks.");
|
op.emitError("has zero or more than one basic blocks.");
|
||||||
loopUnrollFull(forOp);
|
if (failed(loopUnrollFull(forOp))) {
|
||||||
|
forOp.emitError("failed to be fully unrolled.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -45,12 +47,14 @@ bool HLSCppAnalyzer::visitOp(AffineForOp op) {
|
||||||
// If the current loop is annotated as pipeline, all intter loops are
|
// If the current loop is annotated as pipeline, all intter loops are
|
||||||
// automatically unrolled.
|
// automatically unrolled.
|
||||||
if (getBoolAttrValue(op, "pipeline")) {
|
if (getBoolAttrValue(op, "pipeline")) {
|
||||||
op.emitRemark("all inner loops are automatically unrolled.");
|
|
||||||
op.walk([&](AffineForOp forOp) {
|
op.walk([&](AffineForOp forOp) {
|
||||||
if (forOp != op) {
|
if (forOp != op) {
|
||||||
if (forOp.getLoopBody().getBlocks().size() != 1)
|
if (forOp.getLoopBody().getBlocks().size() != 1)
|
||||||
op.emitError("has zero or more than one basic blocks.");
|
op.emitError("has zero or more than one basic blocks.");
|
||||||
loopUnrollFull(forOp);
|
if (failed(loopUnrollFull(forOp))) {
|
||||||
|
forOp.emitError("failed to be fully unrolled.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -129,6 +133,7 @@ HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath,
|
||||||
string opLatencyPath)
|
string opLatencyPath)
|
||||||
: HLSCppToolBase(builder) {
|
: HLSCppToolBase(builder) {
|
||||||
|
|
||||||
|
/*
|
||||||
INIReader targetSpec(targetSpecPath);
|
INIReader targetSpec(targetSpecPath);
|
||||||
if (targetSpec.ParseError())
|
if (targetSpec.ParseError())
|
||||||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||||
|
@ -143,6 +148,7 @@ HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath,
|
||||||
auto freq = targetSpec.Get("spec", "frequency", "200MHz");
|
auto freq = targetSpec.Get("spec", "frequency", "200MHz");
|
||||||
auto latency = opLatency.GetInteger(freq, "op", 0);
|
auto latency = opLatency.GetInteger(freq, "op", 0);
|
||||||
llvm::outs() << latency << "\n";
|
llvm::outs() << latency << "\n";
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the partition index according to the affine map of a memory access
|
/// Calculate the partition index according to the affine map of a memory access
|
||||||
|
@ -467,8 +473,6 @@ bool HLSCppEstimator::visitOp(AffineForOp op) {
|
||||||
// equal to zero. So that in this case, this loop will be flattened into
|
// equal to zero. So that in this case, this loop will be flattened into
|
||||||
// the inner pipelined loop.
|
// the inner pipelined loop.
|
||||||
if (auto II = getUIntAttrValue(child, "init_interval")) {
|
if (auto II = getUIntAttrValue(child, "init_interval")) {
|
||||||
op.emitRemark("this loop is flattened into its inner loop.");
|
|
||||||
|
|
||||||
setAttrValue(op, "init_interval", II);
|
setAttrValue(op, "init_interval", II);
|
||||||
|
|
||||||
auto iterLatency = getUIntAttrValue(child, "iter_latency");
|
auto iterLatency = getUIntAttrValue(child, "iter_latency");
|
||||||
|
|
|
@ -18,59 +18,6 @@ public:
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
static void convertBlock(Block &block) {
|
|
||||||
for (auto &op : block) {
|
|
||||||
if (isa<ArrayOp>(op))
|
|
||||||
continue;
|
|
||||||
auto builder = OpBuilder(&op);
|
|
||||||
|
|
||||||
// ArrayOp will be inserted after each ShapedType value from declaration
|
|
||||||
// or function signature.
|
|
||||||
for (auto operand : op.getOperands()) {
|
|
||||||
if (auto arrayType = operand.getType().dyn_cast<ShapedType>()) {
|
|
||||||
bool insertArrayOp = false;
|
|
||||||
if (operand.getKind() == Value::Kind::BlockArgument)
|
|
||||||
insertArrayOp = true;
|
|
||||||
else if (!isa<ArrayOp>(operand.getDefiningOp()) &&
|
|
||||||
!isa<AssignOp>(operand.getDefiningOp())) {
|
|
||||||
insertArrayOp = true;
|
|
||||||
if (!arrayType.hasStaticShape())
|
|
||||||
operand.getDefiningOp()->emitError(
|
|
||||||
"is unranked or has dynamic shape which is illegal.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (insertArrayOp) {
|
|
||||||
// Insert array operation and set attributes.
|
|
||||||
builder.setInsertionPointAfterValue(operand);
|
|
||||||
auto arrayOp =
|
|
||||||
builder.create<ArrayOp>(op.getLoc(), operand.getType(), operand);
|
|
||||||
operand.replaceAllUsesExcept(arrayOp.getResult(),
|
|
||||||
SmallPtrSet<Operation *, 1>{arrayOp});
|
|
||||||
|
|
||||||
// Set array pragma attributes, default array instance is ram_1p
|
|
||||||
// bram. Other attributes are not set here since they requires more
|
|
||||||
// analysis to be determined.
|
|
||||||
arrayOp.setAttr("interface", builder.getBoolAttr(false));
|
|
||||||
arrayOp.setAttr("storage", builder.getBoolAttr(false));
|
|
||||||
arrayOp.setAttr("partition", builder.getBoolAttr(false));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (auto forOp = dyn_cast<AffineForOp>(op)) {
|
|
||||||
if (forOp.getLoopBody().getBlocks().size() != 1)
|
|
||||||
forOp.emitError("has zero or more than one basic blocks");
|
|
||||||
|
|
||||||
// Set loop pragma attributes.
|
|
||||||
forOp.setAttr("pipeline", builder.getBoolAttr(false));
|
|
||||||
forOp.setAttr("unroll", builder.getBoolAttr(false));
|
|
||||||
forOp.setAttr("flatten", builder.getBoolAttr(false));
|
|
||||||
|
|
||||||
convertBlock(forOp.getLoopBody().front());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ConvertToHLSCpp::runOnOperation() {
|
void ConvertToHLSCpp::runOnOperation() {
|
||||||
for (auto func : getOperation().getOps<FuncOp>()) {
|
for (auto func : getOperation().getOps<FuncOp>()) {
|
||||||
auto b = OpBuilder(func);
|
auto b = OpBuilder(func);
|
||||||
|
@ -101,7 +48,55 @@ void ConvertToHLSCpp::runOnOperation() {
|
||||||
func.emitError("doesn't have a return as terminator.");
|
func.emitError("doesn't have a return as terminator.");
|
||||||
|
|
||||||
// Recursively convert every for loop body blocks.
|
// Recursively convert every for loop body blocks.
|
||||||
convertBlock(func.front());
|
func.walk([&](Operation *op) {
|
||||||
|
auto builder = OpBuilder(op);
|
||||||
|
|
||||||
|
// ArrayOp will be inserted after each ShapedType value from declaration
|
||||||
|
// or function signature.
|
||||||
|
for (auto operand : op->getOperands()) {
|
||||||
|
if (auto arrayType = operand.getType().dyn_cast<ShapedType>()) {
|
||||||
|
bool insertArrayOp = false;
|
||||||
|
if (operand.getKind() == Value::Kind::BlockArgument)
|
||||||
|
insertArrayOp = true;
|
||||||
|
else if (!isa<ArrayOp>(operand.getDefiningOp()) &&
|
||||||
|
!isa<AssignOp>(operand.getDefiningOp())) {
|
||||||
|
insertArrayOp = true;
|
||||||
|
if (!arrayType.hasStaticShape())
|
||||||
|
operand.getDefiningOp()->emitError(
|
||||||
|
"is unranked or has dynamic shape which is illegal.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isa<ArrayOp>(op))
|
||||||
|
insertArrayOp = false;
|
||||||
|
|
||||||
|
if (insertArrayOp) {
|
||||||
|
// Insert array operation and set attributes.
|
||||||
|
builder.setInsertionPointAfterValue(operand);
|
||||||
|
auto arrayOp = builder.create<ArrayOp>(op->getLoc(),
|
||||||
|
operand.getType(), operand);
|
||||||
|
operand.replaceAllUsesExcept(arrayOp.getResult(),
|
||||||
|
SmallPtrSet<Operation *, 1>{arrayOp});
|
||||||
|
|
||||||
|
// Set array pragma attributes, default array instance is ram_1p
|
||||||
|
// bram. Other attributes are not set here since they requires more
|
||||||
|
// analysis to be determined.
|
||||||
|
arrayOp.setAttr("interface", builder.getBoolAttr(false));
|
||||||
|
arrayOp.setAttr("storage", builder.getBoolAttr(false));
|
||||||
|
arrayOp.setAttr("partition", builder.getBoolAttr(false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto forOp = dyn_cast<AffineForOp>(op)) {
|
||||||
|
if (forOp.getLoopBody().getBlocks().size() != 1)
|
||||||
|
forOp.emitError("has zero or more than one basic blocks");
|
||||||
|
|
||||||
|
// Set loop pragma attributes.
|
||||||
|
forOp.setAttr("pipeline", builder.getBoolAttr(false));
|
||||||
|
forOp.setAttr("unroll", builder.getBoolAttr(false));
|
||||||
|
forOp.setAttr("flatten", builder.getBoolAttr(false));
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,10 +2,12 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "Analysis/QoREstimation.h"
|
||||||
#include "Transforms/Passes.h"
|
#include "Transforms/Passes.h"
|
||||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||||
#include "mlir/Dialect/Affine/Passes.h"
|
#include "mlir/Dialect/Affine/Passes.h"
|
||||||
#include "mlir/IR/Builders.h"
|
#include "mlir/IR/Builders.h"
|
||||||
|
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
||||||
#include "mlir/Transforms/LoopUtils.h"
|
#include "mlir/Transforms/LoopUtils.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
@ -18,7 +20,130 @@ struct ArrayPartition : public ArrayPartitionBase<ArrayPartition> {
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void ArrayPartition::runOnOperation() { return; }
|
void ArrayPartition::runOnOperation() {
|
||||||
|
auto module = getOperation();
|
||||||
|
auto builder = OpBuilder(module);
|
||||||
|
|
||||||
|
// Extract all static parameters and current pragma configurations.
|
||||||
|
HLSCppAnalyzer analyzer(builder);
|
||||||
|
analyzer.analyzeModule(getOperation());
|
||||||
|
|
||||||
|
// Canonicalize the analyzed IR.
|
||||||
|
OwningRewritePatternList patterns;
|
||||||
|
|
||||||
|
auto *context = &getContext();
|
||||||
|
for (auto *op : context->getRegisteredOperations())
|
||||||
|
op->getCanonicalizationPatterns(patterns, context);
|
||||||
|
|
||||||
|
Operation *op = getOperation();
|
||||||
|
applyPatternsAndFoldGreedily(op->getRegions(), std::move(patterns));
|
||||||
|
|
||||||
|
// Estimate performance and resource utilization.
|
||||||
|
for (auto func : module.getOps<FuncOp>()) {
|
||||||
|
for (auto forOp : func.getOps<mlir::AffineForOp>()) {
|
||||||
|
// TODO: support imperfect loop nests.
|
||||||
|
SmallVector<mlir::AffineForOp, 4> nestedLoops;
|
||||||
|
getPerfectlyNestedLoops(nestedLoops, forOp);
|
||||||
|
auto innermost = nestedLoops.back();
|
||||||
|
|
||||||
|
// Collect memory access information.
|
||||||
|
MemAccessDict loadDict;
|
||||||
|
innermost.walk([&](mlir::AffineLoadOp loadOp) {
|
||||||
|
auto arrayOp = cast<ArrayOp>(loadOp.getMemRef().getDefiningOp());
|
||||||
|
loadDict[arrayOp].push_back(loadOp);
|
||||||
|
});
|
||||||
|
|
||||||
|
MemAccessDict storeDict;
|
||||||
|
innermost.walk([&](mlir::AffineStoreOp storeOp) {
|
||||||
|
auto arrayOp = cast<ArrayOp>(storeOp.getMemRef().getDefiningOp());
|
||||||
|
storeDict[arrayOp].push_back(storeOp);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Apply array partition pragma.
|
||||||
|
for (auto pair : loadDict) {
|
||||||
|
auto arrayOp = cast<ArrayOp>(pair.first);
|
||||||
|
auto arrayType = arrayOp.getType().cast<MemRefType>();
|
||||||
|
auto arrayAccesses = pair.second;
|
||||||
|
|
||||||
|
// Walk through each dimension of the targeted array.
|
||||||
|
SmallVector<Attribute, 4> partitionFactor;
|
||||||
|
SmallVector<StringRef, 4> partitionType;
|
||||||
|
|
||||||
|
for (size_t dim = 0, e = arrayType.getShape().size(); dim < e; ++dim) {
|
||||||
|
unsigned dimSize = arrayType.getShape()[dim];
|
||||||
|
|
||||||
|
// Collect all array access indices of the current dimension.
|
||||||
|
SmallVector<AffineExpr, 4> indices;
|
||||||
|
for (auto accessOp : arrayAccesses) {
|
||||||
|
auto concreteOp = cast<mlir::AffineLoadOp>(accessOp);
|
||||||
|
auto index = concreteOp.getAffineMap().getResult(dim);
|
||||||
|
// Only add unique index.
|
||||||
|
if (std::find(indices.begin(), indices.end(), index) ==
|
||||||
|
indices.end())
|
||||||
|
indices.push_back(index);
|
||||||
|
}
|
||||||
|
auto accessNum = indices.size();
|
||||||
|
|
||||||
|
// Find the max array access distance in the current block.
|
||||||
|
unsigned maxDistance = 0;
|
||||||
|
bool failFlag = false;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < accessNum; ++i) {
|
||||||
|
for (unsigned j = i + 1; j < accessNum; ++j) {
|
||||||
|
// TODO: this expression can't be simplified.
|
||||||
|
auto expr = indices[j] - indices[i];
|
||||||
|
|
||||||
|
if (auto constDistance = expr.dyn_cast<AffineConstantExpr>()) {
|
||||||
|
unsigned distance = abs(constDistance.getValue());
|
||||||
|
maxDistance = max(maxDistance, distance);
|
||||||
|
} else {
|
||||||
|
// The array partition mechanism will fail if the distance is
|
||||||
|
// not a constant number.
|
||||||
|
// failFlag = true;
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if (failFlag)
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine array partition strategy.
|
||||||
|
maxDistance += 1;
|
||||||
|
if (failFlag || maxDistance == 1) {
|
||||||
|
// This means all accesses have the same index, and this dimension
|
||||||
|
// should not be partitioned.
|
||||||
|
partitionType.push_back("none");
|
||||||
|
partitionFactor.push_back(builder.getUI32IntegerAttr(1));
|
||||||
|
|
||||||
|
} else if (accessNum == dimSize) {
|
||||||
|
// Apply complete array partition.
|
||||||
|
partitionType.push_back("complete");
|
||||||
|
partitionFactor.push_back(builder.getUI32IntegerAttr(1));
|
||||||
|
|
||||||
|
} else if (accessNum >= maxDistance) {
|
||||||
|
// This means some elements are accessed more than once or exactly
|
||||||
|
// once, and successive elements are accessed. In most cases, apply
|
||||||
|
// "cyclic" partition should be the best solution.
|
||||||
|
partitionType.push_back("cyclic");
|
||||||
|
partitionFactor.push_back(builder.getUI32IntegerAttr(maxDistance));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// This means discrete elements are accessed. Typically, "block"
|
||||||
|
// partition will be most benefit for this occasion.
|
||||||
|
partitionType.push_back("block");
|
||||||
|
partitionFactor.push_back(builder.getUI32IntegerAttr(accessNum));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
arrayOp.setAttr("partition", builder.getBoolAttr(true));
|
||||||
|
arrayOp.setAttr("partition_type",
|
||||||
|
builder.getStrArrayAttr(partitionType));
|
||||||
|
arrayOp.setAttr("partition_factor",
|
||||||
|
builder.getArrayAttr(partitionFactor));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<mlir::Pass> scalehls::createArrayPartitionPass() {
|
std::unique_ptr<mlir::Pass> scalehls::createArrayPartitionPass() {
|
||||||
return std::make_unique<ArrayPartition>();
|
return std::make_unique<ArrayPartition>();
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
|
|
||||||
// CHECK-LABEL: func @test_for
|
// CHECK-LABEL: func @test_for
|
||||||
func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) attributes {dataflow = false} {
|
func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) attributes {dataflow = false} {
|
||||||
%array0 = "hlscpp.array"(%arg0) {interface = true, storage = false, partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[1 : ui32, 1 : ui32, 4 : ui32], storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex>
|
%array0 = "hlscpp.array"(%arg0) {interface=true, storage=false, partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[1 : ui32, 1 : ui32, 4 : ui32], storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex>
|
||||||
%array1 = "hlscpp.array"(%arg1) {interface = true, storage = false, partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[1 : ui32, 1 : ui32, 4 : ui32], storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex>
|
%array1 = "hlscpp.array"(%arg1) {interface=true, storage=false, partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[1 : ui32, 1 : ui32, 4 : ui32], storage_type="ram_2p"} : (memref<16x4x4xindex>) -> memref<16x4x4xindex>
|
||||||
affine.for %i = 0 to 16 {
|
affine.for %i = 0 to 16 {
|
||||||
affine.for %j = 0 to 4 {
|
affine.for %j = 0 to 4 {
|
||||||
affine.for %k = 0 to 4 {
|
affine.for %k = 0 to 4 {
|
||||||
|
|
Loading…
Reference in New Issue