[QoREstimation] estimate based on memref type rather than ArrayOp attributes

This commit is contained in:
Hanchen Ye 2021-01-03 17:48:36 -06:00
parent ff6e7f0c4c
commit f052331cbc
4 changed files with 65 additions and 97 deletions

View File

@ -87,6 +87,9 @@ using MemAccessesMap = DenseMap<Value, MemAccesses>;
void getMemAccessesMap(Block &block, MemAccessesMap &map,
bool includeCalls = false);
Optional<std::pair<int64_t, int64_t>>
getBoundOfAffineBound(AffineBound bound, MLIRContext *context);
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
// AffineIfOp is transparent.
Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
@ -96,16 +99,10 @@ Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,
// level with dstOp's any parent loop.
Operation *getSameLevelDstOp(Operation *srcOp, Operation *dstOp);
/// Get the definition ArrayOp given any memref or memory access operation.
hlscpp::ArrayOp getArrayOp(Value memref);
AffineMap getLayoutMap(MemRefType memrefType);
hlscpp::ArrayOp getArrayOp(Operation *op);
Optional<std::pair<int64_t, int64_t>>
getBoundOfAffineBound(AffineBound bound, MLIRContext *context);
void getPartitionFactors(ArrayRef<int64_t> shape, AffineMap layoutMap,
SmallVector<int64_t, 4> &factors);
int64_t getPartitionFactors(MemRefType memrefType,
SmallVector<int64_t, 4> *factors = nullptr);
} // namespace scalehls
} // namespace mlir

View File

@ -41,16 +41,17 @@ public:
// Indicate the unoccupied memory ports number.
struct PortInfo {
PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
: rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
unsigned rdPort;
unsigned wrPort;
unsigned rdwrPort;
PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
: rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
};
// For storing ports number of all partitions indexed by the array (ArrayOp).
// For storing ports number of all partitions indexed by the memref.
using Ports = SmallVector<PortInfo, 16>;
using PortsMap = DenseMap<Operation *, Ports>;
using PortsMap = DenseMap<Value, Ports>;
// For storing PortsMap indexed by the scheduling level.
using PortsMapDict = DenseMap<int64_t, PortsMap>;
@ -130,11 +131,10 @@ public:
/// Collect all dependencies detected in the function.
void HLSCppEstimator::getFuncDependencies() {
// TODO: This can be simplified by traversing each ArrayOp in the function.
MemAccessesMap map;
getMemAccessesMap(func.front(), map, /*includeCallOp=*/true);
// Walk through all ArrayOp - LoadOp/StoreOp pairs, and find all memory
// Walk through all MemRef - LoadOp/StoreOp pairs, and find all memory
// related dependencies.
for (auto &pair : map) {
auto memAccesses = pair.second;
@ -226,18 +226,15 @@ int64_t HLSCppEstimator::getPartitionIndex(Operation *op) {
dimReplacements, symReplacements, accessMap.getNumDims(),
accessMap.getNumSymbols());
// Check whether the memref is partitioned.
auto memrefMaps = memrefType.getAffineMaps();
if (memrefMaps.empty())
return 0;
// Compose the access map with the layout map.
auto layoutMap = memrefMaps.back();
auto layoutMap = getLayoutMap(memrefType);
if (layoutMap.isEmpty())
return 0;
auto composeMap = layoutMap.compose(newMap);
// Collect partition factors.
SmallVector<int64_t, 4> factors;
getPartitionFactors(memrefType.getShape(), layoutMap, factors);
getPartitionFactors(memrefType, &factors);
// Calculate the partition index of this load/store operation honoring the
// partition strategy applied.
@ -266,15 +263,18 @@ void HLSCppEstimator::estimateLoadStore(Operation *op, int64_t begin) {
auto partitionIdx = getPartitionIndex(op);
setAttrValue(op, "partition_index", partitionIdx);
auto arrayOp = getArrayOp(op);
auto partitionNum = arrayOp.partition_num();
auto storageType = arrayOp.storage_type();
auto access = MemRefAccess(op);
auto memref = access.memref;
auto memrefType = memref.getType().cast<MemRefType>();
auto partitionNum = getPartitionFactors(memrefType);
std::string storageType = "ram_1p_bram";
// Try to avoid memory port violation until a legal schedule is found. Since
// an infinite length schedule cannot be generated, this while loop can be
// proofed to have an end.
while (true) {
auto memPort = portsMapDict[begin][arrayOp];
auto memPort = portsMapDict[begin][memref];
bool memPortEmpty = memPort.empty();
// If the memory has not been occupied by the current schedule level, it
@ -337,7 +337,7 @@ void HLSCppEstimator::estimateLoadStore(Operation *op, int64_t begin) {
// If successed, break the while loop. Otherwise increase the schedule level
// by 1 and continue to try.
if (successFlag) {
portsMapDict[begin][arrayOp] = memPort;
portsMapDict[begin][memref] = memPort;
break;
} else
begin++;
@ -372,10 +372,9 @@ int64_t HLSCppEstimator::getResMinII(MemAccessesMap &map) {
int64_t II = 1;
for (auto &pair : map) {
auto arrayOp = getArrayOp(pair.first);
// Partition number should at least be 1.
auto partitionNum = arrayOp.partition_num();
auto storageType = arrayOp.storage_type();
auto memrefType = pair.first.getType().cast<MemRefType>();
auto partitionNum = getPartitionFactors(memrefType);
std::string storageType = "ram_1p_bram";
SmallVector<int64_t, 16> readNum;
SmallVector<int64_t, 16> writeNum;

View File

@ -160,41 +160,40 @@ Operation *scalehls::getSameLevelDstOp(Operation *srcOp, Operation *dstOp) {
return nullptr;
}
/// Get the definition ArrayOp given any memref or memory access operation.
hlscpp::ArrayOp scalehls::getArrayOp(Value memref) {
assert(memref.getType().isa<MemRefType>() && "isn't a MemRef type value");
AffineMap scalehls::getLayoutMap(MemRefType memrefType) {
// Check whether the memref has layout map.
auto memrefMaps = memrefType.getAffineMaps();
if (memrefMaps.empty())
return AffineMap();
auto defOp = memref.getDefiningOp();
assert(defOp && "MemRef is block argument");
auto arrayOp = dyn_cast<hlscpp::ArrayOp>(defOp);
assert(arrayOp && "MemRef is not defined by ArrayOp");
return arrayOp;
return memrefMaps.back();
}
hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
return getArrayOp(MemRefAccess(op).memref);
}
int64_t scalehls::getPartitionFactors(MemRefType memrefType,
SmallVector<int64_t, 4> *factors) {
auto shape = memrefType.getShape();
auto layoutMap = getLayoutMap(memrefType);
int64_t accumFactor = 1;
void scalehls::getPartitionFactors(ArrayRef<int64_t> shape, AffineMap layoutMap,
SmallVector<int64_t, 4> &factors) {
for (unsigned dim = 0, e = shape.size(); dim < e; ++dim) {
for (unsigned dim = 0; dim < memrefType.getRank(); ++dim) {
int64_t factor = 1;
if (!layoutMap.isEmpty()) {
auto expr = layoutMap.getResult(dim);
if (auto binaryExpr = expr.dyn_cast<AffineBinaryOpExpr>()) {
if (auto factor = binaryExpr.getRHS().dyn_cast<AffineConstantExpr>()) {
if (auto binaryExpr = expr.dyn_cast<AffineBinaryOpExpr>())
if (auto rhsExpr = binaryExpr.getRHS().dyn_cast<AffineConstantExpr>()) {
if (expr.getKind() == AffineExprKind::Mod)
factors.push_back(factor.getValue());
else if (expr.getKind() == AffineExprKind::FloorDiv) {
auto blockFactor =
(shape[dim] + factor.getValue() - 1) / factor.getValue();
factors.push_back(blockFactor);
factor = rhsExpr.getValue();
else if (expr.getKind() == AffineExprKind::FloorDiv)
factor = (shape[dim] + rhsExpr.getValue() - 1) / rhsExpr.getValue();
}
}
} else if (auto constExpr = expr.dyn_cast<AffineConstantExpr>()) {
if (constExpr.getValue() == 0)
factors.push_back(1);
}
accumFactor *= factor;
if (factors != nullptr)
factors->push_back(factor);
}
return accumFactor;
}

View File

@ -34,26 +34,18 @@ static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) {
template <typename OpType>
static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
for (auto pair : map) {
auto arrayOp = getArrayOp(pair.first);
auto arrayShape = arrayOp.getShapedType().getShape();
auto arrayAccesses = pair.second;
auto memref = pair.first;
auto memrefType = memref.getType().cast<MemRefType>();
auto loadStores = pair.second;
// Walk through each dimension of the targeted array.
SmallVector<int64_t, 4> partitionFactor;
SmallVector<StringRef, 4> partitionType;
SmallVector<AffineExpr, 4> partitionIndices;
SmallVector<AffineExpr, 4> addressIndices;
unsigned partitionNum = 1;
for (size_t dim = 0, e = arrayShape.size(); dim < e; ++dim) {
for (unsigned dim = 0; dim < memrefType.getRank(); ++dim) {
// Collect all array access indices of the current dimension.
SmallVector<AffineExpr, 4> indices;
for (auto accessOp : arrayAccesses) {
for (auto accessOp : loadStores) {
auto concreteOp = cast<OpType>(accessOp);
auto index = concreteOp.getAffineMap().getResult(dim);
// Only add unique index.
@ -64,7 +56,6 @@ static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
// Find the max array access distance in the current block.
unsigned maxDistance = 0;
bool failFlag = false;
for (unsigned i = 0; i < accessNum; ++i) {
for (unsigned j = i + 1; j < accessNum; ++j) {
@ -74,23 +65,15 @@ static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
if (auto constDistance = expr.dyn_cast<AffineConstantExpr>()) {
unsigned distance = abs(constDistance.getValue());
maxDistance = max(maxDistance, distance);
} else {
// failFlag = true;
// break;
}
}
// if (failFlag)
// break;
}
// Determine array partition strategy.
maxDistance += 1;
unsigned factor = 1;
if (failFlag || maxDistance == 1) {
if (maxDistance == 1) {
// This means all accesses have the same index, and this dimension
// should not be partitioned.
partitionType.push_back("none");
partitionIndices.push_back(builder.getAffineConstantExpr(0));
addressIndices.push_back(builder.getAffineDimExpr(dim));
@ -98,8 +81,7 @@ static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
// This means some elements are accessed more than once or exactly
// once, and successive elements are accessed. In most cases,
// apply "cyclic" partition should be the best solution.
partitionType.push_back("cyclic");
factor = maxDistance;
unsigned factor = maxDistance;
partitionIndices.push_back(builder.getAffineDimExpr(dim) % factor);
addressIndices.push_back(
@ -108,17 +90,13 @@ static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
} else {
// This means discrete elements are accessed. Typically, "block"
// partition will be most benefit for this occasion.
partitionType.push_back("block");
factor = accessNum;
unsigned factor = accessNum;
auto blockFactor = (memrefType.getShape()[dim] + factor - 1) / factor;
partitionIndices.push_back(
builder.getAffineDimExpr(dim).floorDiv(blockFactor));
addressIndices.push_back(builder.getAffineDimExpr(dim) % blockFactor);
}
partitionFactor.push_back(factor);
partitionNum *= factor;
}
// Construct new layout map.
@ -127,18 +105,13 @@ static void applyArrayPartition(MemAccessesMap &map, OpBuilder &builder) {
builder.getContext());
// Construct new memref type.
auto newType = MemRefType::get(memrefType.getShape(),
memrefType.getElementType(), layoutMap);
auto newType =
MemRefType::get(memrefType.getShape(), memrefType.getElementType(),
layoutMap, memrefType.getMemorySpace());
// Set new type.
memref.setType(newType);
// TODO: set function type.
arrayOp.setAttr("partition", builder.getBoolAttr(true));
arrayOp.setAttr("partition_type", builder.getStrArrayAttr(partitionType));
arrayOp.setAttr("partition_factor",
builder.getI64ArrayAttr(partitionFactor));
arrayOp.setAttr("partition_num", builder.getI64IntegerAttr(partitionNum));
}
}