202 lines
8.2 KiB
C++
202 lines
8.2 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Copyright 2020-2021 The ScaleHLS Authors.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
|
#include "mlir/IR/AffineExprVisitor.h"
|
|
#include "scalehls/Transforms/Passes.h"
|
|
#include "scalehls/Transforms/Utils.h"
|
|
|
|
using namespace mlir;
|
|
using namespace scalehls;
|
|
using namespace hls;
|
|
|
|
namespace {
|
|
struct CreateMemrefSubview
|
|
: public scalehls::CreateMemrefSubviewBase<CreateMemrefSubview> {
|
|
void runOnOperation() override;
|
|
};
|
|
} // namespace
|
|
|
|
/// This pass reduces the sizes of memrefs passed to each function: (1) conduct
|
|
/// loop analysis to determine which tile of a memref is accessed in a function,
|
|
/// (2) create a corresponding SubViewOp to load the tile out from the original
|
|
/// memref and replace all uses.
|
|
void CreateMemrefSubview::runOnOperation() {
|
|
auto func = getOperation();
|
|
auto b = OpBuilder(func);
|
|
auto loc = b.getUnknownLoc();
|
|
|
|
// Collect all target loop bands.
|
|
AffineLoopBands targetBands;
|
|
getLoopBands(func.front(), targetBands, /*allowHavingChilds=*/true);
|
|
|
|
for (auto &band : targetBands) {
|
|
AffineLoopBand tileBand;
|
|
AffineLoopBand pointBand;
|
|
if (!getTileAndPointLoopBand(band, tileBand, pointBand) ||
|
|
pointBand.empty())
|
|
continue;
|
|
|
|
b.setInsertionPoint(pointBand.front());
|
|
pointBand.back().walk([&](Operation *op) {
|
|
// We only consider affine read/write for now.
|
|
SmallVector<Value, 4> operands;
|
|
AffineMap map;
|
|
Value memref;
|
|
if (auto loadOp = dyn_cast<mlir::AffineReadOpInterface>(op)) {
|
|
operands = SmallVector<Value, 4>(loadOp.getMapOperands());
|
|
map = loadOp.getAffineMap();
|
|
memref = loadOp.getMemRef();
|
|
} else if (auto storeOp = dyn_cast<mlir::AffineWriteOpInterface>(op)) {
|
|
operands = SmallVector<Value, 4>(storeOp.getMapOperands());
|
|
map = storeOp.getAffineMap();
|
|
memref = storeOp.getMemRef();
|
|
} else
|
|
return WalkResult::advance();
|
|
|
|
// No need to create subview for on-chip buffers. TODO: Should we make
|
|
// this an option?
|
|
if (memref.getType().cast<MemRefType>().getMemorySpaceAsInt() !=
|
|
(unsigned)MemoryKind::DRAM)
|
|
return WalkResult::advance();
|
|
|
|
// Construct the dimensions set whose corresponding operand is point loop
|
|
// induction variable.
|
|
llvm::SmallDenseSet<unsigned, 8> pointDims;
|
|
unsigned dim = 0;
|
|
for (auto operand : operands) {
|
|
auto loop = getForInductionVarOwner(operand);
|
|
|
|
// We only need to consider point loops here and We assume all point
|
|
// loops are normalized and have constant bounds.
|
|
if (loop && llvm::find(pointBand, loop) != pointBand.end()) {
|
|
if (!loop.hasConstantLowerBound() || !loop.hasConstantUpperBound() ||
|
|
loop.getConstantLowerBound() != 0 ||
|
|
loop.getConstantUpperBound() <= 0 || loop.getStep() != 1)
|
|
return WalkResult::advance();
|
|
pointDims.insert(dim);
|
|
}
|
|
++dim;
|
|
}
|
|
|
|
auto numDims = map.getNumDims();
|
|
auto numSymbols = map.getNumSymbols();
|
|
SmallVector<AffineExpr, 4> accessExprs;
|
|
SmallVector<OpFoldResult, 4> bufOffsets;
|
|
SmallVector<OpFoldResult, 4> bufSizes;
|
|
SmallVector<OpFoldResult, 4> bufStrides;
|
|
|
|
// Traverse the memory access index of each dimension to construct the
|
|
// sizes, offsets, and strids of the memref subview. Also, construct the
|
|
// new memory access indices.
|
|
for (auto expr : map.getResults()) {
|
|
if (!expr.isPureAffine())
|
|
return WalkResult::advance();
|
|
|
|
// Get the flattened form of the expr, which is a sum of products in an
|
|
// order of [dims, symbols, locals, constant].
|
|
SimpleAffineExprFlattener flattener(numDims, numSymbols);
|
|
flattener.walkPostOrder(expr);
|
|
auto flattenedExpr = flattener.operandExprStack.back();
|
|
|
|
// Construct the size-expr and offset-expr. For the dims and symbols, as
|
|
// long as an id is found in "pointDims", it is added to the size-expr.
|
|
// Otherwise, it is added to th offset-expr.
|
|
auto offsetExpr = b.getAffineConstantExpr(flattenedExpr.back());
|
|
auto sizeExpr = b.getAffineConstantExpr(0);
|
|
for (unsigned i = 0, e = numDims + numSymbols; i < e; ++i) {
|
|
auto factor = flattenedExpr[i];
|
|
auto id = i < numDims ? b.getAffineDimExpr(i)
|
|
: b.getAffineSymbolExpr(i - numDims);
|
|
if (pointDims.count(i))
|
|
sizeExpr = sizeExpr + id * factor;
|
|
else
|
|
offsetExpr = offsetExpr + id * factor;
|
|
}
|
|
|
|
// For local exprs, if the expr is constructed by pure point loop
|
|
// induction variables or dynamic variables, it is added to the
|
|
// size-expr or offset-expr, respectively. Otherwise, the size of the
|
|
// local buffer will be dynamically shaped, which is not supported by
|
|
// HLS thus is skipped.
|
|
for (unsigned i = numDims + numSymbols, e = flattenedExpr.size() - 1;
|
|
i < e; ++i) {
|
|
auto localExpr = flattener.localExprs[i - numDims - numSymbols];
|
|
|
|
bool hasPointLoopVar = false;
|
|
bool hasDynamicVar = false;
|
|
localExpr.walk([&](AffineExpr id) {
|
|
if (auto dim = id.dyn_cast<AffineDimExpr>()) {
|
|
if (pointDims.count(dim.getPosition()))
|
|
hasPointLoopVar = true;
|
|
else
|
|
hasDynamicVar = true;
|
|
} else if (id.isa<AffineSymbolExpr>())
|
|
hasDynamicVar = true;
|
|
});
|
|
|
|
auto factor = flattenedExpr[i];
|
|
if (hasPointLoopVar && !hasDynamicVar)
|
|
sizeExpr = sizeExpr + localExpr * factor;
|
|
else if (!hasPointLoopVar && hasDynamicVar)
|
|
offsetExpr = offsetExpr + localExpr * factor;
|
|
else if (hasPointLoopVar && hasDynamicVar)
|
|
return WalkResult::advance();
|
|
else
|
|
llvm_unreachable("unexpected local expression");
|
|
}
|
|
|
|
// The stride is simply the largest divisor of the size-expr.
|
|
auto divisor = std::max((int64_t)1, sizeExpr.getLargestKnownDivisor());
|
|
bufStrides.push_back(b.getI64IntegerAttr(divisor));
|
|
|
|
// Now we need to determine the size of the resulting memref.
|
|
sizeExpr = sizeExpr.floorDiv(divisor);
|
|
accessExprs.push_back(sizeExpr);
|
|
AffineValueMap sizeMap(AffineMap::get(numDims, numSymbols, sizeExpr),
|
|
operands);
|
|
(void)sizeMap.canonicalize();
|
|
|
|
// Take the upper bound as the size of the current dimension.
|
|
auto bounds = getBoundOfAffineMap(sizeMap.getAffineMap(),
|
|
ValueRange(sizeMap.getOperands()));
|
|
if (!bounds.hasValue() || bounds.getValue().first != 0)
|
|
return WalkResult::advance();
|
|
bufSizes.push_back(b.getI64IntegerAttr(bounds.getValue().second + 1));
|
|
|
|
// Now we can construct the affine apply for the offset of the current
|
|
// memory dimension.
|
|
AffineValueMap offsetMap(
|
|
AffineMap::get(numDims, numSymbols, offsetExpr), operands);
|
|
(void)offsetMap.canonicalize();
|
|
auto offsetOp = b.create<AffineApplyOp>(loc, offsetMap.getAffineMap(),
|
|
offsetMap.getOperands());
|
|
bufOffsets.push_back(offsetOp.getResult());
|
|
}
|
|
|
|
// Finally, create the subview op with the constructed offsets (values
|
|
// generated by affine apply ops), sizes, and strides. Note that this
|
|
// subview only serves the current op and we will canonicalize away
|
|
// redundant subviews later.
|
|
auto subview = b.create<memref::SubViewOp>(loc, memref, bufOffsets,
|
|
bufSizes, bufStrides);
|
|
memref.replaceUsesWithIf(subview.getResult(), [&](OpOperand &use) {
|
|
return use.getOwner() == op;
|
|
});
|
|
|
|
// Update memory access maps of the current op.
|
|
auto accessMap =
|
|
AffineMap::get(numDims, numSymbols, accessExprs, map.getContext());
|
|
op->setAttr("map", AffineMapAttr::get(accessMap));
|
|
return WalkResult::advance();
|
|
});
|
|
}
|
|
}
|
|
|
|
std::unique_ptr<Pass> scalehls::createCreateMemrefSubviewPass() {
|
|
return std::make_unique<CreateMemrefSubview>();
|
|
}
|