[ArrayPartition] support multi-loops array partition; factor out applyArrayPartition() method (#20)
This commit is contained in:
parent
a01b440a95
commit
07d77f7193
|
@ -5,28 +5,6 @@
|
|||
#ifndef SCALEHLS_DIALECT_HLSCPP_ATTRIBUTES_TD
|
||||
#define SCALEHLS_DIALECT_HLSCPP_ATTRIBUTES_TD
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Customized ui32 Attributes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def PositiveI64Attr : Confined<I64Attr, [IntPositive]> {}
|
||||
def PositiveI64ArrayAttr : TypedArrayAttrBase<PositiveI64Attr, ""> {}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pragma array_partition Attributes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def PartitionTypeAttr : StrEnumAttr<"PartitionType", "", [
|
||||
StrEnumAttrCase<"cyclic", 0>,
|
||||
StrEnumAttrCase<"block", 1>,
|
||||
StrEnumAttrCase<"complete", 2>,
|
||||
StrEnumAttrCase<"none", 3>
|
||||
]> {
|
||||
let cppNamespace = "::mlir::scalehls::hlscpp";
|
||||
}
|
||||
|
||||
def PartitionTypeArrayAttr : TypedArrayAttrBase<PartitionTypeAttr, ""> {}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pragma Interface Attributes (for array ports)
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -25,9 +25,11 @@ enum class MemoryKind {
|
|||
// URAM_S2P = 4,
|
||||
// URAM_T2P = 5,
|
||||
|
||||
DRAM = 3,
|
||||
DRAM = 3
|
||||
};
|
||||
|
||||
enum class PartitionKind { CYCLIC = 0, BLOCK = 1, NONE = 2 };
|
||||
|
||||
} // namespace hlscpp
|
||||
} // namespace scalehls
|
||||
} // namespace mlir
|
||||
|
|
|
@ -18,43 +18,6 @@ def AssignOp : HLSCppOp<"assign", [SameOperandsAndResultType]> {
|
|||
let results = (outs AnyType : $output);
|
||||
}
|
||||
|
||||
// Deprecated. Will be removed in the future.
|
||||
def ArrayOp : HLSCppOp<"array", [SameOperandsAndResultType]> {
|
||||
let summary = "A C++ array instance";
|
||||
let description = [{
|
||||
This hlscpp.array operation represent an array in C++. All shaped type value
|
||||
(e.g., memref, tensor, and vector) should be passed through this operation
|
||||
after declared by an allocation (e.g., Alloc, etc.) operation or in the
|
||||
signature of a function. This will help the compiler to easily manage the
|
||||
attributs and statistics of arrays.
|
||||
}];
|
||||
|
||||
let arguments = (ins Type<IsShapedTypePred> : $input,
|
||||
|
||||
// Interface-related attributes.
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $interface,
|
||||
DefaultValuedAttr<InterfaceModeAttr, "bram"> : $interface_mode,
|
||||
|
||||
// BindStorage-related attributes.
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $storage,
|
||||
DefaultValuedAttr<StorageTypeAttr, "ram_1p_bram"> : $storage_type,
|
||||
|
||||
// ArrayPartition-related attributes.
|
||||
DefaultValuedAttr<BoolAttr, "false"> : $partition,
|
||||
DefaultValuedAttr<PositiveI64Attr, "1"> : $partition_num,
|
||||
DefaultValuedAttr<PartitionTypeArrayAttr, "{}"> : $partition_type,
|
||||
DefaultValuedAttr<PositiveI64ArrayAttr, "{}"> : $partition_factor
|
||||
);
|
||||
|
||||
let results = (outs Type<IsShapedTypePred> : $output);
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
ShapedType getShapedType() {
|
||||
return getType().cast<ShapedType>();
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
def EndOp : HLSCppOp<"end", [Terminator]> {
|
||||
let summary = "Mark the end of a HLSCpp region";
|
||||
let description = [{
|
||||
|
|
|
@ -31,6 +31,8 @@ bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder);
|
|||
/// fully unrolled.
|
||||
bool applyLoopPipelining(AffineForOp loop, OpBuilder &builder);
|
||||
|
||||
bool applyArrayPartition(FuncOp func, OpBuilder &builder);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Optimization Pass Entries
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -241,7 +241,7 @@ int64_t HLSCppEstimator::getPartitionIndex(Operation *op) {
|
|||
int64_t partitionIdx = 0;
|
||||
int64_t accumFactor = 1;
|
||||
|
||||
for (auto dim = 0; dim < memrefType.getRank(); ++dim) {
|
||||
for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) {
|
||||
auto idxExpr = composeMap.getResult(dim);
|
||||
|
||||
if (auto constExpr = idxExpr.dyn_cast<AffineConstantExpr>())
|
||||
|
|
|
@ -177,7 +177,7 @@ int64_t scalehls::getPartitionFactors(MemRefType memrefType,
|
|||
auto layoutMap = getLayoutMap(memrefType, memrefType.getContext());
|
||||
int64_t accumFactor = 1;
|
||||
|
||||
for (unsigned dim = 0; dim < memrefType.getRank(); ++dim) {
|
||||
for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) {
|
||||
int64_t factor = 1;
|
||||
|
||||
if (!layoutMap.isEmpty()) {
|
||||
|
|
|
@ -1397,7 +1397,7 @@ void ModuleEmitter::emitArrayPragmas(Value memref) {
|
|||
SmallVector<int64_t, 4> factors;
|
||||
getPartitionFactors(type, &factors);
|
||||
|
||||
for (unsigned dim = 0; dim < type.getRank(); ++dim) {
|
||||
for (int64_t dim = 0; dim < type.getRank(); ++dim) {
|
||||
if (factors[dim] != 1) {
|
||||
emitPragmaFlag = true;
|
||||
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
|
||||
#include "Analysis/Utils.h"
|
||||
#include "Transforms/Passes.h"
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace mlir;
|
||||
|
@ -13,89 +15,135 @@ using namespace hlscpp;
|
|||
|
||||
namespace {
|
||||
struct ArrayPartition : public ArrayPartitionBase<ArrayPartition> {
|
||||
void runOnOperation() override;
|
||||
void runOnOperation() override {
|
||||
auto func = getOperation();
|
||||
auto builder = OpBuilder(func);
|
||||
|
||||
applyArrayPartition(func, builder);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) {
|
||||
SmallVector<mlir::AffineForOp, 4> nestedLoops;
|
||||
root.walk([&](mlir::AffineForOp loop) {
|
||||
if (auto attr = loop.getAttrOfType<BoolAttr>("pipeline")) {
|
||||
bool scalehls::applyArrayPartition(FuncOp func, OpBuilder &builder) {
|
||||
// Only memory accesses in pipelined loops will be executed in parallel.
|
||||
SmallVector<AffineForOp, 4> pipelinedLoops;
|
||||
func.walk([&](AffineForOp loop) {
|
||||
if (auto attr = loop.getAttrOfType<BoolAttr>("pipeline"))
|
||||
if (attr.getValue())
|
||||
nestedLoops.push_back(loop);
|
||||
}
|
||||
pipelinedLoops.push_back(loop);
|
||||
});
|
||||
if (nestedLoops.empty())
|
||||
return nullptr;
|
||||
else
|
||||
return nestedLoops.back();
|
||||
}
|
||||
|
||||
template <typename OpType>
|
||||
static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
|
||||
for (auto pair : map) {
|
||||
// Storing the partition information of each memref.
|
||||
using PartitionInfo = std::pair<PartitionKind, int64_t>;
|
||||
DenseMap<Value, SmallVector<PartitionInfo, 4>> partitionsMap;
|
||||
|
||||
// Traverse all pipelined loops.
|
||||
for (auto loop : pipelinedLoops) {
|
||||
MemAccessesMap accessesMap;
|
||||
getMemAccessesMap(loop.getLoopBody().front(), accessesMap);
|
||||
|
||||
for (auto pair : accessesMap) {
|
||||
auto memref = pair.first;
|
||||
auto memrefType = memref.getType().cast<MemRefType>();
|
||||
auto loadStores = pair.second;
|
||||
auto &partitions = partitionsMap[memref];
|
||||
|
||||
// If the current partitionsMap is empty, initialize it with no partition
|
||||
// and factor of 1.
|
||||
if (partitions.empty()) {
|
||||
for (int64_t dim = 0; dim < memrefType.getRank(); ++dim)
|
||||
partitions.push_back(PartitionInfo(PartitionKind::NONE, 1));
|
||||
}
|
||||
|
||||
// Find the best partition solution for each dimensions of the memref.
|
||||
for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) {
|
||||
// Collect all array access indices of the current dimension.
|
||||
SmallVector<AffineExpr, 4> indices;
|
||||
for (auto accessOp : loadStores) {
|
||||
// Get memory access map.
|
||||
AffineValueMap accessMap;
|
||||
MemRefAccess(accessOp).getAccessMap(&accessMap);
|
||||
|
||||
// Get index expression.
|
||||
auto index = accessMap.getResult(dim);
|
||||
|
||||
// Only add unique index.
|
||||
if (std::find(indices.begin(), indices.end(), index) == indices.end())
|
||||
indices.push_back(index);
|
||||
}
|
||||
auto accessNum = indices.size();
|
||||
|
||||
// Find the max array access distance in the current block.
|
||||
unsigned maxDistance = 0;
|
||||
|
||||
for (unsigned i = 0; i < accessNum; ++i) {
|
||||
for (unsigned j = i + 1; j < accessNum; ++j) {
|
||||
// TODO: this expression can't be simplified in some cases.
|
||||
auto expr = indices[j] - indices[i];
|
||||
|
||||
if (auto constDistance = expr.dyn_cast<AffineConstantExpr>()) {
|
||||
unsigned distance = abs(constDistance.getValue());
|
||||
maxDistance = max(maxDistance, distance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine array partition strategy.
|
||||
// TODO: take storage type into consideration.
|
||||
maxDistance += 1;
|
||||
if (maxDistance == 1) {
|
||||
// This means all accesses have the same index, and this dimension
|
||||
// should not be partitioned.
|
||||
continue;
|
||||
|
||||
} else if (accessNum >= maxDistance) {
|
||||
// This means some elements are accessed more than once or exactly
|
||||
// once, and successive elements are accessed. In most cases, apply
|
||||
// "cyclic" partition should be the best solution.
|
||||
unsigned factor = maxDistance;
|
||||
if (factor > partitions[dim].second)
|
||||
partitions[dim] = PartitionInfo(PartitionKind::CYCLIC, factor);
|
||||
|
||||
} else {
|
||||
// This means discrete elements are accessed. Typically, "block"
|
||||
// partition will be most benefit for this occasion.
|
||||
unsigned factor = accessNum;
|
||||
if (factor > partitions[dim].second)
|
||||
partitions[dim] = PartitionInfo(PartitionKind::BLOCK, factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Constuct and set new type to each partitioned MemRefType.
|
||||
for (auto pair : partitionsMap) {
|
||||
auto memref = pair.first;
|
||||
auto memrefType = memref.getType().cast<MemRefType>();
|
||||
auto loadStores = pair.second;
|
||||
auto partitions = pair.second;
|
||||
|
||||
// Walk through each dimension of the targeted array.
|
||||
// Walk through each dimension of the current memory.
|
||||
SmallVector<AffineExpr, 4> partitionIndices;
|
||||
SmallVector<AffineExpr, 4> addressIndices;
|
||||
|
||||
for (unsigned dim = 0; dim < memrefType.getRank(); ++dim) {
|
||||
// Collect all array access indices of the current dimension.
|
||||
SmallVector<AffineExpr, 4> indices;
|
||||
for (auto accessOp : loadStores) {
|
||||
auto concreteOp = cast<OpType>(accessOp);
|
||||
auto index = concreteOp.getAffineMap().getResult(dim);
|
||||
// Only add unique index.
|
||||
if (std::find(indices.begin(), indices.end(), index) == indices.end())
|
||||
indices.push_back(index);
|
||||
}
|
||||
auto accessNum = indices.size();
|
||||
|
||||
// Find the max array access distance in the current block.
|
||||
unsigned maxDistance = 0;
|
||||
|
||||
for (unsigned i = 0; i < accessNum; ++i) {
|
||||
for (unsigned j = i + 1; j < accessNum; ++j) {
|
||||
// TODO: this expression can't be simplified.
|
||||
auto expr = indices[j] - indices[i];
|
||||
|
||||
if (auto constDistance = expr.dyn_cast<AffineConstantExpr>()) {
|
||||
unsigned distance = abs(constDistance.getValue());
|
||||
maxDistance = max(maxDistance, distance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine array partition strategy.
|
||||
maxDistance += 1;
|
||||
if (maxDistance == 1) {
|
||||
// This means all accesses have the same index, and this dimension
|
||||
// should not be partitioned.
|
||||
partitionIndices.push_back(builder.getAffineConstantExpr(0));
|
||||
addressIndices.push_back(builder.getAffineDimExpr(dim));
|
||||
|
||||
} else if (accessNum >= maxDistance) {
|
||||
// This means some elements are accessed more than once or exactly
|
||||
// once, and successive elements are accessed. In most cases,
|
||||
// apply "cyclic" partition should be the best solution.
|
||||
unsigned factor = maxDistance;
|
||||
for (int64_t dim = 0; dim < memrefType.getRank(); ++dim) {
|
||||
auto partition = partitions[dim];
|
||||
auto kind = partition.first;
|
||||
auto factor = partition.second;
|
||||
|
||||
if (kind == PartitionKind::CYCLIC) {
|
||||
partitionIndices.push_back(builder.getAffineDimExpr(dim) % factor);
|
||||
addressIndices.push_back(
|
||||
builder.getAffineDimExpr(dim).floorDiv(factor));
|
||||
|
||||
} else {
|
||||
// This means discrete elements are accessed. Typically, "block"
|
||||
// partition will be most benefit for this occasion.
|
||||
unsigned factor = accessNum;
|
||||
|
||||
} else if (kind == PartitionKind::BLOCK) {
|
||||
auto blockFactor = (memrefType.getShape()[dim] + factor - 1) / factor;
|
||||
partitionIndices.push_back(
|
||||
builder.getAffineDimExpr(dim).floorDiv(blockFactor));
|
||||
addressIndices.push_back(builder.getAffineDimExpr(dim) % blockFactor);
|
||||
|
||||
} else {
|
||||
partitionIndices.push_back(builder.getAffineConstantExpr(0));
|
||||
addressIndices.push_back(builder.getAffineDimExpr(dim));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -112,41 +160,15 @@ static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
|
|||
// Set new type.
|
||||
memref.setType(newType);
|
||||
}
|
||||
}
|
||||
|
||||
void ArrayPartition::runOnOperation() {
|
||||
auto func = getOperation();
|
||||
auto builder = OpBuilder(func);
|
||||
|
||||
// Apply array partition.
|
||||
for (auto forOp : func.getOps<mlir::AffineForOp>()) {
|
||||
// TODO: support imperfect loop.
|
||||
if (auto outermost = getPipelineLoop(forOp)) {
|
||||
// Collect memory access information.
|
||||
MemAccessesMap loadMap;
|
||||
outermost.walk([&](mlir::AffineLoadOp loadOp) {
|
||||
loadMap[loadOp.getMemRef()].push_back(loadOp);
|
||||
});
|
||||
|
||||
MemAccessesMap storeMap;
|
||||
outermost.walk([&](mlir::AffineStoreOp storeOp) {
|
||||
storeMap[storeOp.getMemRef()].push_back(storeOp);
|
||||
});
|
||||
|
||||
// Apply array partition pragma.
|
||||
// TODO: how to decide which to pick?
|
||||
applyArrayPartition<mlir::AffineLoadOp>(loadMap, builder);
|
||||
applyArrayPartition<mlir::AffineStoreOp>(storeMap, builder);
|
||||
|
||||
// TODO: how to handle the case when different sub-functions have
|
||||
// different array partition strategy selected?
|
||||
}
|
||||
}
|
||||
|
||||
// Align function type with entry block argument types.
|
||||
auto resultTypes = func.front().getTerminator()->getOperandTypes();
|
||||
auto inputTypes = func.front().getArgumentTypes();
|
||||
func.setType(builder.getFunctionType(inputTypes, resultTypes));
|
||||
|
||||
// TODO: how to handle the case when different sub-functions have different
|
||||
// array partition strategy selected?
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<mlir::Pass> scalehls::createArrayPartitionPass() {
|
||||
|
|
Loading…
Reference in New Issue