[Transforms] Refactor the applyLoopTiling API to make it more robust
This commit is contained in:
parent
10252043d4
commit
690a2d2eaa
|
@ -64,10 +64,12 @@ bool applyAffineLoopOrderOpt(AffineLoopBand &band,
|
|||
bool applyRemoveVariableBound(AffineLoopBand &band);
|
||||
|
||||
/// Apply loop tiling to the input loop band and sink all intra-tile loops to
|
||||
/// the innermost loop with the original loop order. Return the location of the
|
||||
/// innermost tile-space loop.
|
||||
Optional<unsigned> applyLoopTiling(AffineLoopBand &band, TileList tileList,
|
||||
bool simplify = true);
|
||||
/// the innermost loop with the original loop order. If "tileOrderOpt" is true,
|
||||
/// the order of all tile-space loops are optimizaed after tiling. If
|
||||
/// "unrollPointLoops" is true, all intra-tile loops (also called point loops)
|
||||
/// are fully unrolled after tiling.
|
||||
bool applyLoopTiling(AffineLoopBand &band, TileList tileList,
|
||||
bool tileOrderOpt = true, bool unrollPointLoops = true);
|
||||
|
||||
bool applyLegalizeToHLSCpp(FuncOp func, bool topFunc);
|
||||
|
||||
|
@ -76,22 +78,24 @@ bool applyLegalizeToHLSCpp(FuncOp func, bool topFunc);
|
|||
bool applyLoopPipelining(AffineLoopBand &band, unsigned pipelineLoc,
|
||||
unsigned targetII);
|
||||
|
||||
/// Apply simplification optimizations.
|
||||
bool applySimplificationOpts(FuncOp func);
|
||||
|
||||
/// Fully unroll all loops insides of a loop block.
|
||||
bool applyFullyLoopUnrolling(Block &block);
|
||||
|
||||
bool applyFullyUnrollAndPartition(Block &block, FuncOp func);
|
||||
|
||||
bool applyMemoryAccessOpt(FuncOp func);
|
||||
bool applyFullyLoopUnrolling(Block &block, unsigned maxIterNum = 10);
|
||||
|
||||
/// Apply the specified array partition factors and kinds.
|
||||
bool applyArrayPartition(Value array, ArrayRef<unsigned> factors,
|
||||
ArrayRef<hlscpp::PartitionKind> kinds,
|
||||
bool updateFuncSignature = true);
|
||||
|
||||
/// Find the suitable array partition factors and kinds for all arrays in the
|
||||
/// targeted function.
|
||||
bool applyAutoArrayPartition(FuncOp func);
|
||||
|
||||
/// Apply optimization strategy to a loop band. The ancestor function is
|
||||
/// also passed in because the post-tiling optimizations have to take
|
||||
/// function as target, e.g. canonicalizer and array partition.
|
||||
/// Apply optimization strategy to a loop band. The ancestor function is also
|
||||
/// passed in because the post-tiling optimizations have to take function as
|
||||
/// target, e.g. canonicalizer and array partition.
|
||||
bool applyOptStrategy(AffineLoopBand &band, FuncOp func, TileList tileList,
|
||||
unsigned targetII);
|
||||
|
||||
|
|
|
@ -171,13 +171,12 @@ static bool loopVarBoundRemoval(PyAffineLoopBand band) {
|
|||
|
||||
/// If succeeded, return the location of the innermost tile-space loop.
|
||||
/// Otherwise, return -1.
|
||||
static int64_t loopTiling(PyAffineLoopBand band, py::object factorsObject,
|
||||
bool simplify) {
|
||||
static bool loopTiling(PyAffineLoopBand band, py::object factorsObject,
|
||||
bool tileOrderOpt, bool unrollPointLoops) {
|
||||
py::gil_scoped_release();
|
||||
llvm::SmallVector<unsigned, 8> factors;
|
||||
getVectorFromUnsignedNpArray(factorsObject.ptr(), factors);
|
||||
auto loc = applyLoopTiling(band.get(), factors, simplify);
|
||||
return loc.hasValue() ? loc.getValue() : -1;
|
||||
return applyLoopTiling(band.get(), factors, tileOrderOpt, unrollPointLoops);
|
||||
}
|
||||
|
||||
static bool loopPipelining(PyAffineLoopBand band, int64_t pipelineLoc,
|
||||
|
@ -200,12 +199,12 @@ static bool legalizeToHLSCpp(MlirOperation op, bool topFunc) {
|
|||
return applyLegalizeToHLSCpp(func, topFunc);
|
||||
}
|
||||
|
||||
static bool memoryAccessOpt(MlirOperation op) {
|
||||
static bool simplificationOpts(MlirOperation op) {
|
||||
py::gil_scoped_release();
|
||||
auto func = dyn_cast<FuncOp>(unwrap(op));
|
||||
if (!func)
|
||||
throw SetPyError(PyExc_ValueError, "targeted operation not a function");
|
||||
return applyMemoryAccessOpt(func);
|
||||
return applySimplificationOpts(func);
|
||||
}
|
||||
|
||||
static bool autoArrayPartition(MlirOperation op) {
|
||||
|
@ -273,7 +272,7 @@ PYBIND11_MODULE(_scalehls, m) {
|
|||
|
||||
// Function transform APIs.
|
||||
m.def("legalize_to_hlscpp", &legalizeToHLSCpp);
|
||||
m.def("memory_access_opt", &memoryAccessOpt);
|
||||
m.def("memory_access_opt", &simplificationOpts);
|
||||
m.def("auto_array_partition", &autoArrayPartition);
|
||||
|
||||
// Array transform APIs.
|
||||
|
|
|
@ -45,6 +45,7 @@ static void updateSubFuncs(FuncOp func, Builder builder) {
|
|||
});
|
||||
}
|
||||
|
||||
/// Apply the specified array partition factors and kinds.
|
||||
bool scalehls::applyArrayPartition(Value array, ArrayRef<unsigned> factors,
|
||||
ArrayRef<hlscpp::PartitionKind> kinds,
|
||||
bool updateFuncSignature) {
|
||||
|
@ -203,6 +204,8 @@ getDimAccessMaps(Operation *op, AffineValueMap valueMap, int64_t dim) {
|
|||
return maps;
|
||||
}
|
||||
|
||||
/// Find the suitable array partition factors and kinds for all arrays in the
|
||||
/// targeted function.
|
||||
bool scalehls::applyAutoArrayPartition(FuncOp func) {
|
||||
// Check whether the input function is pipelined.
|
||||
bool funcPipeline = false;
|
||||
|
|
|
@ -5,10 +5,8 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
|
||||
#include "mlir/Dialect/Affine/Analysis/Utils.h"
|
||||
#include "mlir/Dialect/Affine/LoopUtils.h"
|
||||
#include "mlir/Dialect/Affine/Utils.h"
|
||||
#include "mlir/IR/IntegerSet.h"
|
||||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
||||
#include "scalehls/Transforms/Passes.h"
|
||||
#include "scalehls/Transforms/Utils.h"
|
||||
|
@ -16,109 +14,42 @@
|
|||
using namespace mlir;
|
||||
using namespace scalehls;
|
||||
|
||||
static IntegerSet simplify(IntegerSet set) { return simplifyIntegerSet(set); }
|
||||
|
||||
/// Performs basic affine map simplifications.
|
||||
static AffineMap simplify(AffineMap map) {
|
||||
MutableAffineMap mMap(map);
|
||||
mMap.simplify();
|
||||
return mMap.getAffineMap();
|
||||
}
|
||||
|
||||
/// Utility to simplify an affine attribute and update its entry in the parent
|
||||
/// operation if necessary.
|
||||
template <typename AttrT>
|
||||
static void
|
||||
simplifyAndUpdateAttr(Operation *op, StringAttr name, AttrT attr,
|
||||
DenseMap<Attribute, Attribute> &simplifiedAttrs) {
|
||||
auto &simplified = simplifiedAttrs[attr];
|
||||
if (simplified == attr)
|
||||
return;
|
||||
|
||||
// This is a newly encountered attribute.
|
||||
if (!simplified) {
|
||||
// Try to simplify the value of the attribute.
|
||||
auto value = attr.getValue();
|
||||
auto simplifiedValue = simplify(value);
|
||||
if (simplifiedValue == value) {
|
||||
simplified = attr;
|
||||
return;
|
||||
}
|
||||
simplified = AttrT::get(simplifiedValue);
|
||||
}
|
||||
|
||||
// Simplification was successful, so update the attribute.
|
||||
op->setAttr(name, simplified);
|
||||
}
|
||||
|
||||
static void simplifyAffineStructures(Block &block) {
|
||||
auto context = block.front().getContext();
|
||||
DenseMap<Attribute, Attribute> simplifiedAttrs;
|
||||
|
||||
RewritePatternSet patterns(context);
|
||||
AffineApplyOp::getCanonicalizationPatterns(patterns, context);
|
||||
AffineForOp::getCanonicalizationPatterns(patterns, context);
|
||||
AffineIfOp::getCanonicalizationPatterns(patterns, context);
|
||||
FrozenRewritePatternSet frozenPatterns(std::move(patterns));
|
||||
|
||||
// The simplification of affine attributes will likely simplify the op. Try to
|
||||
// fold/apply canonicalization patterns when we have affine dialect ops.
|
||||
SmallVector<Operation *> opsToSimplify;
|
||||
block.walk([&](Operation *op) {
|
||||
for (auto attr : op->getAttrs()) {
|
||||
if (auto mapAttr = attr.getValue().dyn_cast<AffineMapAttr>())
|
||||
simplifyAndUpdateAttr(op, attr.getName(), mapAttr, simplifiedAttrs);
|
||||
else if (auto setAttr = attr.getValue().dyn_cast<IntegerSetAttr>())
|
||||
simplifyAndUpdateAttr(op, attr.getName(), setAttr, simplifiedAttrs);
|
||||
}
|
||||
|
||||
if (isa<AffineForOp, AffineIfOp, AffineApplyOp>(op))
|
||||
opsToSimplify.push_back(op);
|
||||
});
|
||||
applyOpPatternsAndFold(opsToSimplify, frozenPatterns, /*strict=*/true);
|
||||
}
|
||||
|
||||
/// Apply loop tiling to the input loop band and sink all intra-tile loops to
|
||||
/// the innermost loop with the original loop order. Return the location of the
|
||||
/// innermost tile-space loop.
|
||||
Optional<unsigned> scalehls::applyLoopTiling(AffineLoopBand &band,
|
||||
TileList tileList, bool simplify) {
|
||||
/// the innermost loop with the original loop order. If "tileOrderOpt" is true,
|
||||
/// the order of all tile-space loops are optimizaed after tiling. If
|
||||
/// "unrollPointLoops" is true, all intra-tile loops (also called point loops)
|
||||
/// are fully unrolled after tiling.
|
||||
bool scalehls::applyLoopTiling(AffineLoopBand &band, TileList tileList,
|
||||
bool tileOrderOpt, bool unrollPointLoops) {
|
||||
assert(!band.empty() && "no loops provided");
|
||||
|
||||
if (!isPerfectlyNested(band))
|
||||
return Optional<unsigned>();
|
||||
return false;
|
||||
|
||||
// Loop tiling.
|
||||
// Record the original band size and attributes to make use of later.
|
||||
auto originalBandSize = band.size();
|
||||
SmallVector<LoopDirectiveAttr, 6> bandAttrs;
|
||||
for (auto loop : band)
|
||||
bandAttrs.push_back(getLoopDirective(loop));
|
||||
|
||||
// Apply loop tiling.
|
||||
AffineLoopBand tiledBand;
|
||||
if (failed(tilePerfectlyNested(band, tileList, &tiledBand)))
|
||||
return Optional<unsigned>();
|
||||
return false;
|
||||
|
||||
// Simplify the tiled loop band if required.
|
||||
if (simplify) {
|
||||
band.clear();
|
||||
unsigned simplifiedBandSize = 0;
|
||||
for (unsigned i = 0, e = tiledBand.size(); i < e; ++i) {
|
||||
auto loop = tiledBand[i];
|
||||
|
||||
Optional<uint64_t> tripCount = getConstantTripCount(loop);
|
||||
if (i < originalBandSize - 1 || simplifiedBandSize > 0 || !tripCount ||
|
||||
tripCount.getValue() != 1)
|
||||
(void)normalizeAffineFor(loop);
|
||||
|
||||
if (loop && !loop.getLoopBody().empty()) {
|
||||
band.push_back(loop);
|
||||
if (i < originalBandSize)
|
||||
++simplifiedBandSize;
|
||||
}
|
||||
}
|
||||
simplifyAffineStructures(*band.front().getBody());
|
||||
return simplifiedBandSize - 1;
|
||||
}
|
||||
|
||||
// Otherwise, directly return the tiled loop band.
|
||||
// Get all tile-space loops and reannotate the attributes.
|
||||
band = tiledBand;
|
||||
return originalBandSize - 1;
|
||||
band.resize(originalBandSize);
|
||||
for (auto zip : llvm::zip(band, bandAttrs))
|
||||
if (std::get<1>(zip))
|
||||
setLoopDirective(std::get<0>(zip), std::get<1>(zip));
|
||||
|
||||
// Apply loop order optimization and point loops unrolling if required.
|
||||
if (tileOrderOpt)
|
||||
applyAffineLoopOrderOpt(band);
|
||||
if (unrollPointLoops)
|
||||
applyFullyLoopUnrolling(*band.back().getBody());
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -159,13 +90,9 @@ struct AffineLoopUnrollAndPipeline
|
|||
sizes.push_back(1);
|
||||
}
|
||||
|
||||
// Apply loop tiling and extract the tile loops if applicable.
|
||||
if (auto tileLoc = applyLoopTiling(band, sizes))
|
||||
band.resize(tileLoc.getValue() + 1);
|
||||
|
||||
// Apply loop order optimization and pipelining.
|
||||
if (loopOrderOpt)
|
||||
applyAffineLoopOrderOpt(band);
|
||||
// Apply loop unrolling and pipelining.
|
||||
applyLoopTiling(band, sizes, /*tileOrderOpt=*/loopOrderOpt.getValue(),
|
||||
/*unrollPointLoops=*/true);
|
||||
applyLoopPipelining(band, band.size() - 1, (unsigned)1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -676,7 +676,9 @@ bool ScaleHLSOptimizer::simplifyLoopNests(FuncOp func) {
|
|||
// unrolling to it.
|
||||
tmpFunc.walk([&](AffineForOp loop) {
|
||||
if (loop->getAttrOfType<BoolAttr>("opt_flag")) {
|
||||
applyFullyUnrollAndPartition(*loop.getBody(), tmpFunc);
|
||||
applyFullyLoopUnrolling(*loop.getBody());
|
||||
applySimplificationOpts(tmpFunc);
|
||||
applyAutoArrayPartition(tmpFunc);
|
||||
return;
|
||||
}
|
||||
});
|
||||
|
@ -685,9 +687,11 @@ bool ScaleHLSOptimizer::simplifyLoopNests(FuncOp func) {
|
|||
estimator.estimateFunc(tmpFunc);
|
||||
|
||||
// Fully unroll the candidate loop or delve into child loops.
|
||||
if (getResource(tmpFunc).getDsp() <= maxDspNum)
|
||||
applyFullyUnrollAndPartition(*candidate.getBody(), func);
|
||||
else {
|
||||
if (getResource(tmpFunc).getDsp() <= maxDspNum) {
|
||||
applyFullyLoopUnrolling(*candidate.getBody());
|
||||
applySimplificationOpts(func);
|
||||
applyAutoArrayPartition(func);
|
||||
} else {
|
||||
auto childForOps = candidate.getOps<AffineForOp>();
|
||||
targetLoops.append(childForOps.begin(), childForOps.end());
|
||||
}
|
||||
|
|
|
@ -83,7 +83,9 @@ void scalehls::registerScaleHLSPyTorchPipeline() {
|
|||
scalehls::createAffineLoopUnrollAndPipelinePass(loopUnrollSize));
|
||||
}
|
||||
|
||||
// Memory accessing simplifications.
|
||||
// Apply simplifications.
|
||||
pm.addPass(mlir::createAffineLoopNormalizePass());
|
||||
pm.addPass(mlir::createSimplifyAffineStructuresPass());
|
||||
pm.addPass(mlir::createCanonicalizerPass());
|
||||
pm.addPass(scalehls::createSimplifyAffineIfPass());
|
||||
pm.addPass(scalehls::createAffineStoreForwardPass());
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "scalehls/Transforms/Utils.h"
|
||||
#include "mlir/Dialect/Affine/LoopUtils.h"
|
||||
#include "mlir/Dialect/Affine/Passes.h"
|
||||
#include "mlir/Pass/PassManager.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
#include "scalehls/Transforms/Passes.h"
|
||||
|
@ -84,10 +85,36 @@ void scalehls::setFuncDirective(Operation *op, bool pipeline,
|
|||
// Loop transform utils
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static void addSimplificationPipeline(PassManager &pm) {
|
||||
// To factor out the redundant affine operations.
|
||||
pm.addPass(createAffineLoopNormalizePass());
|
||||
pm.addPass(createSimplifyAffineStructuresPass());
|
||||
pm.addPass(createCanonicalizerPass());
|
||||
pm.addPass(createSimplifyAffineIfPass());
|
||||
|
||||
// To simplify the memory accessing. Note that the store forwarding is
|
||||
// non-trivial and has a worst case complexity of O(n^2).
|
||||
pm.addPass(createAffineStoreForwardPass());
|
||||
pm.addPass(createSimplifyMemrefAccessPass());
|
||||
|
||||
// Generic common sub expression elimination.
|
||||
pm.addPass(createCSEPass());
|
||||
pm.addPass(createReduceInitialIntervalPass());
|
||||
}
|
||||
|
||||
/// Apply simplification optimizations.
|
||||
bool scalehls::applySimplificationOpts(FuncOp func) {
|
||||
// Apply general optimizations.
|
||||
PassManager optPM(func.getContext(), "builtin.func");
|
||||
addSimplificationPipeline(optPM);
|
||||
if (failed(optPM.run(func)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Fully unroll all loops insides of a block.
|
||||
bool scalehls::applyFullyLoopUnrolling(Block &block) {
|
||||
// Try 8 iterations before exiting.
|
||||
for (auto i = 0; i < 8; ++i) {
|
||||
bool scalehls::applyFullyLoopUnrolling(Block &block, unsigned maxIterNum) {
|
||||
for (unsigned i = 0; i < maxIterNum; ++i) {
|
||||
bool hasFullyUnrolled = true;
|
||||
block.walk([&](AffineForOp loop) {
|
||||
if (failed(loopUnrollFull(loop)))
|
||||
|
@ -103,46 +130,6 @@ bool scalehls::applyFullyLoopUnrolling(Block &block) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static void addPassPipeline(PassManager &pm) {
|
||||
// To factor out the redundant AffineApply/AffineIf operations.
|
||||
pm.addPass(createCanonicalizerPass());
|
||||
pm.addPass(createSimplifyAffineIfPass());
|
||||
|
||||
// To simplify the memory accessing. Note that the store forwarding is
|
||||
// non-trivial and has a worst case complexity of O(n^2).
|
||||
pm.addPass(createAffineStoreForwardPass());
|
||||
pm.addPass(createSimplifyMemrefAccessPass());
|
||||
|
||||
// Generic common sub expression elimination.
|
||||
pm.addPass(createCSEPass());
|
||||
pm.addPass(createReduceInitialIntervalPass());
|
||||
}
|
||||
|
||||
bool scalehls::applyMemoryAccessOpt(FuncOp func) {
|
||||
// Apply general optimizations.
|
||||
PassManager optPM(func.getContext(), "builtin.func");
|
||||
addPassPipeline(optPM);
|
||||
if (failed(optPM.run(func)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scalehls::applyFullyUnrollAndPartition(Block &block, FuncOp func) {
|
||||
applyFullyLoopUnrolling(block);
|
||||
|
||||
// Apply general optimizations.
|
||||
PassManager optPM(func.getContext(), "builtin.func");
|
||||
addPassPipeline(optPM);
|
||||
if (failed(optPM.run(func)))
|
||||
return false;
|
||||
|
||||
// Apply the best suitable array partition strategy to the function.
|
||||
applyAutoArrayPartition(func);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Apply optimization strategy to a loop band. The ancestor function is also
|
||||
/// passed in because the post-tiling optimizations have to take function as
|
||||
/// target, e.g. canonicalizer and array partition.
|
||||
|
@ -152,27 +139,22 @@ bool scalehls::applyOptStrategy(AffineLoopBand &band, FuncOp func,
|
|||
if (!func->isProperAncestor(band.front()))
|
||||
return false;
|
||||
|
||||
// Apply loop tiling.
|
||||
auto pipelineLoopLoc = applyLoopTiling(band, tileList);
|
||||
if (!pipelineLoopLoc)
|
||||
return false;
|
||||
|
||||
// Apply LegalizeToHLSCpp conversion.
|
||||
applyLegalizeToHLSCpp(func, /*isTopFunc=*/true);
|
||||
|
||||
// Apply loop tiling.
|
||||
if (!applyLoopTiling(band, tileList, /*tileOrderOpt=*/false,
|
||||
/*unrollPointLoops=*/true))
|
||||
return false;
|
||||
|
||||
// Apply loop pipelining.
|
||||
if (!applyLoopPipelining(band, pipelineLoopLoc.getValue(), targetII))
|
||||
if (!applyLoopPipelining(band, band.size() - 1, targetII))
|
||||
return false;
|
||||
|
||||
// Apply generic optimizations.
|
||||
PassManager optPM(func.getContext(), "builtin.func");
|
||||
addPassPipeline(optPM);
|
||||
if (failed(optPM.run(func)))
|
||||
return false;
|
||||
|
||||
// Apply the best suitable array partition strategy to the function.
|
||||
// Apply memory access optimizations and the best suitable array partition
|
||||
// strategy to the function.
|
||||
applySimplificationOpts(func);
|
||||
applyAutoArrayPartition(func);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -181,32 +163,24 @@ bool scalehls::applyOptStrategy(FuncOp func, ArrayRef<TileList> tileLists,
|
|||
ArrayRef<unsigned> targetIIs) {
|
||||
AffineLoopBands bands;
|
||||
getLoopBands(func.front(), bands);
|
||||
|
||||
// Apply loop tiling and pipelining to all loop bands.
|
||||
SmallVector<unsigned, 4> pipelineLoopLocs;
|
||||
for (unsigned i = 0, e = bands.size(); i < e; ++i) {
|
||||
auto pipelineLoopLoc = applyLoopTiling(bands[i], tileLists[i]);
|
||||
if (!pipelineLoopLoc)
|
||||
return false;
|
||||
pipelineLoopLocs.push_back(pipelineLoopLoc.getValue());
|
||||
}
|
||||
assert(bands.size() == tileLists.size() && bands.size() == targetIIs.size() &&
|
||||
"unexpected size of tile lists or target IIs");
|
||||
|
||||
// Apply LegalizeToHLSCpp conversion.
|
||||
applyLegalizeToHLSCpp(func, /*isTopFunc=*/true);
|
||||
|
||||
for (unsigned i = 0, e = bands.size(); i < e; ++i) {
|
||||
if (!applyLoopPipelining(bands[i], pipelineLoopLocs[i], targetIIs[i]))
|
||||
// Apply loop tiling to all loop bands.
|
||||
for (unsigned i = 0, e = bands.size(); i < e; ++i)
|
||||
if (!applyLoopTiling(bands[i], tileLists[i]))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Apply generic optimizations.
|
||||
PassManager optPM(func.getContext(), "builtin.func");
|
||||
addPassPipeline(optPM);
|
||||
if (failed(optPM.run(func)))
|
||||
return false;
|
||||
for (unsigned i = 0, e = bands.size(); i < e; ++i)
|
||||
if (!applyLoopPipelining(bands[i], bands[i].size() - 1, targetIIs[i]))
|
||||
return false;
|
||||
|
||||
// Apply the best suitable array partition strategy to the function.
|
||||
// Apply memory access optimizations and the best suitable array partition
|
||||
// strategy to the function.
|
||||
applySimplificationOpts(func);
|
||||
applyAutoArrayPartition(func);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,22 +1,19 @@
|
|||
// RUN: scalehls-opt -affine-loop-unroll-and-pipeline="unroll-size=2 loop-order-opt=false" %s | FileCheck %s
|
||||
|
||||
// CHECK: #map0 = affine_map<(d0, d1) -> (d0 + d1 * 2)>
|
||||
// CHECK: #map1 = affine_map<(d0) -> (d0 + 1)>
|
||||
// CHECK: #map = affine_map<(d0) -> (d0 + 1)>
|
||||
// CHECK: #set0 = affine_set<(d0, d1) : (d0 - d1 >= 0)>
|
||||
// CHECK: #set1 = affine_set<(d0) : (d0 == 0)>
|
||||
#set0 = affine_set<(d0, d1) : (d0 - d1 >= 0)>
|
||||
#set1 = affine_set<(d0) : (d0 == 0)>
|
||||
module {
|
||||
func @test_syrk(%arg0: f32, %arg1: f32, %arg2: memref<16x16xf32>, %arg3: memref<16x16xf32>) {
|
||||
// CHECK: affine.for %arg4 = 0 to 8 {
|
||||
// CHECK: affine.for %arg4 = 0 to 16 step 2 {
|
||||
// CHECK: affine.for %arg5 = 0 to 16 {
|
||||
// CHECK: affine.for %arg6 = 0 to 16 {
|
||||
// CHECK-NOT: affine.for %arg7 = 0 to 2 {
|
||||
affine.for %arg4 = 0 to 16 {
|
||||
affine.for %arg5 = 0 to 16 {
|
||||
affine.for %arg6 = 0 to 16 {
|
||||
// CHECK: %0 = affine.apply #map0(%c0, %arg4)
|
||||
// CHECK: affine.if #set0(%arg5, %arg6) {
|
||||
affine.if #set0(%arg5, %arg6) {
|
||||
%0 = affine.load %arg3[%arg5, %arg6] : memref<16x16xf32>
|
||||
%1 = arith.mulf %arg1, %0 : f32
|
||||
|
@ -31,9 +28,7 @@ module {
|
|||
%7 = arith.addf %6, %4 : f32
|
||||
affine.store %7, %arg3[%arg5, %arg6] : memref<16x16xf32>
|
||||
}
|
||||
// CHECK: %1 = affine.apply #map1(%c0)
|
||||
// CHECK: %2 = affine.apply #map0(%1, %arg4)
|
||||
// CHECK: affine.if #set0(%arg5, %arg6) {
|
||||
// CHECK: %0 = affine.apply #map(%arg4)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,10 +71,11 @@ def main():
|
|||
# Note: We use the trip count to generate this example "factors".
|
||||
factors = np.ones(band.depth, dtype=int)
|
||||
factors[-1] = band.get_trip_count(band.depth - 1) / 4
|
||||
loc = scalehls.loop_tiling(band, factors, True) # simplify = True
|
||||
# tileOrderOpt = False, unrollPointLoops = True
|
||||
scalehls.loop_tiling(band, factors, False, True)
|
||||
|
||||
# Apply loop pipelining. All loops inside of the pipelined loop are fully unrolled.
|
||||
scalehls.loop_pipelining(band, loc, 3) # targetII = 3
|
||||
scalehls.loop_pipelining(band, band.depth - 1, 3) # targetII = 3
|
||||
|
||||
# Traverse all arrays in the function.
|
||||
arrays = scalehls.ArrayList(func)
|
||||
|
@ -93,7 +94,7 @@ def main():
|
|||
scalehls.legalize_to_hlscpp(
|
||||
func, func.sym_name.value == opts.function)
|
||||
|
||||
# Optimize memory accesses through store forwarding, etc.
|
||||
# Apply simplifications.
|
||||
scalehls.memory_access_opt(func)
|
||||
|
||||
# Apply suitable array partition strategies through analyzing the array access pattern.
|
||||
|
|
Loading…
Reference in New Issue