From 690a2d2eaa912819154c3030b8dd73af0e78c6f9 Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Mon, 28 Feb 2022 20:05:20 -0600 Subject: [PATCH] [Transforms] Refactor the applyLoopTiling API to make it more robust --- include/scalehls/Transforms/Utils.h | 28 ++-- lib/Bindings/Python/ScaleHLSModule.cpp | 13 +- lib/Transforms/Directive/ArrayPartition.cpp | 3 + .../Loop/AffineLoopUnrollAndPipeline.cpp | 131 ++++-------------- lib/Transforms/MultipleLevelDSE.cpp | 12 +- lib/Transforms/Passes.cpp | 4 +- lib/Transforms/Utils.cpp | 126 +++++++---------- .../Loop/affine_loop_unroll_and_pipeline.mlir | 11 +- tools/pyscalehls/pyscalehls.py | 7 +- 9 files changed, 122 insertions(+), 213 deletions(-) diff --git a/include/scalehls/Transforms/Utils.h b/include/scalehls/Transforms/Utils.h index 4899f38..86f5598 100644 --- a/include/scalehls/Transforms/Utils.h +++ b/include/scalehls/Transforms/Utils.h @@ -64,10 +64,12 @@ bool applyAffineLoopOrderOpt(AffineLoopBand &band, bool applyRemoveVariableBound(AffineLoopBand &band); /// Apply loop tiling to the input loop band and sink all intra-tile loops to -/// the innermost loop with the original loop order. Return the location of the -/// innermost tile-space loop. -Optional applyLoopTiling(AffineLoopBand &band, TileList tileList, - bool simplify = true); +/// the innermost loop with the original loop order. If "tileOrderOpt" is true, +/// the order of all tile-space loops are optimizaed after tiling. If +/// "unrollPointLoops" is true, all intra-tile loops (also called point loops) +/// are fully unrolled after tiling. +bool applyLoopTiling(AffineLoopBand &band, TileList tileList, + bool tileOrderOpt = true, bool unrollPointLoops = true); bool applyLegalizeToHLSCpp(FuncOp func, bool topFunc); @@ -76,22 +78,24 @@ bool applyLegalizeToHLSCpp(FuncOp func, bool topFunc); bool applyLoopPipelining(AffineLoopBand &band, unsigned pipelineLoc, unsigned targetII); +/// Apply simplification optimizations. +bool applySimplificationOpts(FuncOp func); + /// Fully unroll all loops insides of a loop block. -bool applyFullyLoopUnrolling(Block &block); - -bool applyFullyUnrollAndPartition(Block &block, FuncOp func); - -bool applyMemoryAccessOpt(FuncOp func); +bool applyFullyLoopUnrolling(Block &block, unsigned maxIterNum = 10); +/// Apply the specified array partition factors and kinds. bool applyArrayPartition(Value array, ArrayRef factors, ArrayRef kinds, bool updateFuncSignature = true); +/// Find the suitable array partition factors and kinds for all arrays in the +/// targeted function. bool applyAutoArrayPartition(FuncOp func); -/// Apply optimization strategy to a loop band. The ancestor function is -/// also passed in because the post-tiling optimizations have to take -/// function as target, e.g. canonicalizer and array partition. +/// Apply optimization strategy to a loop band. The ancestor function is also +/// passed in because the post-tiling optimizations have to take function as +/// target, e.g. canonicalizer and array partition. bool applyOptStrategy(AffineLoopBand &band, FuncOp func, TileList tileList, unsigned targetII); diff --git a/lib/Bindings/Python/ScaleHLSModule.cpp b/lib/Bindings/Python/ScaleHLSModule.cpp index 20f77a8..ae8f265 100644 --- a/lib/Bindings/Python/ScaleHLSModule.cpp +++ b/lib/Bindings/Python/ScaleHLSModule.cpp @@ -171,13 +171,12 @@ static bool loopVarBoundRemoval(PyAffineLoopBand band) { /// If succeeded, return the location of the innermost tile-space loop. /// Otherwise, return -1. -static int64_t loopTiling(PyAffineLoopBand band, py::object factorsObject, - bool simplify) { +static bool loopTiling(PyAffineLoopBand band, py::object factorsObject, + bool tileOrderOpt, bool unrollPointLoops) { py::gil_scoped_release(); llvm::SmallVector factors; getVectorFromUnsignedNpArray(factorsObject.ptr(), factors); - auto loc = applyLoopTiling(band.get(), factors, simplify); - return loc.hasValue() ? loc.getValue() : -1; + return applyLoopTiling(band.get(), factors, tileOrderOpt, unrollPointLoops); } static bool loopPipelining(PyAffineLoopBand band, int64_t pipelineLoc, @@ -200,12 +199,12 @@ static bool legalizeToHLSCpp(MlirOperation op, bool topFunc) { return applyLegalizeToHLSCpp(func, topFunc); } -static bool memoryAccessOpt(MlirOperation op) { +static bool simplificationOpts(MlirOperation op) { py::gil_scoped_release(); auto func = dyn_cast(unwrap(op)); if (!func) throw SetPyError(PyExc_ValueError, "targeted operation not a function"); - return applyMemoryAccessOpt(func); + return applySimplificationOpts(func); } static bool autoArrayPartition(MlirOperation op) { @@ -273,7 +272,7 @@ PYBIND11_MODULE(_scalehls, m) { // Function transform APIs. m.def("legalize_to_hlscpp", &legalizeToHLSCpp); - m.def("memory_access_opt", &memoryAccessOpt); + m.def("memory_access_opt", &simplificationOpts); m.def("auto_array_partition", &autoArrayPartition); // Array transform APIs. diff --git a/lib/Transforms/Directive/ArrayPartition.cpp b/lib/Transforms/Directive/ArrayPartition.cpp index fb7a358..5a38bfe 100644 --- a/lib/Transforms/Directive/ArrayPartition.cpp +++ b/lib/Transforms/Directive/ArrayPartition.cpp @@ -45,6 +45,7 @@ static void updateSubFuncs(FuncOp func, Builder builder) { }); } +/// Apply the specified array partition factors and kinds. bool scalehls::applyArrayPartition(Value array, ArrayRef factors, ArrayRef kinds, bool updateFuncSignature) { @@ -203,6 +204,8 @@ getDimAccessMaps(Operation *op, AffineValueMap valueMap, int64_t dim) { return maps; } +/// Find the suitable array partition factors and kinds for all arrays in the +/// targeted function. bool scalehls::applyAutoArrayPartition(FuncOp func) { // Check whether the input function is pipelined. bool funcPipeline = false; diff --git a/lib/Transforms/Loop/AffineLoopUnrollAndPipeline.cpp b/lib/Transforms/Loop/AffineLoopUnrollAndPipeline.cpp index fc41ad7..e320a76 100644 --- a/lib/Transforms/Loop/AffineLoopUnrollAndPipeline.cpp +++ b/lib/Transforms/Loop/AffineLoopUnrollAndPipeline.cpp @@ -5,10 +5,8 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" -#include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Dialect/Affine/LoopUtils.h" #include "mlir/Dialect/Affine/Utils.h" -#include "mlir/IR/IntegerSet.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "scalehls/Transforms/Passes.h" #include "scalehls/Transforms/Utils.h" @@ -16,109 +14,42 @@ using namespace mlir; using namespace scalehls; -static IntegerSet simplify(IntegerSet set) { return simplifyIntegerSet(set); } - -/// Performs basic affine map simplifications. -static AffineMap simplify(AffineMap map) { - MutableAffineMap mMap(map); - mMap.simplify(); - return mMap.getAffineMap(); -} - -/// Utility to simplify an affine attribute and update its entry in the parent -/// operation if necessary. -template -static void -simplifyAndUpdateAttr(Operation *op, StringAttr name, AttrT attr, - DenseMap &simplifiedAttrs) { - auto &simplified = simplifiedAttrs[attr]; - if (simplified == attr) - return; - - // This is a newly encountered attribute. - if (!simplified) { - // Try to simplify the value of the attribute. - auto value = attr.getValue(); - auto simplifiedValue = simplify(value); - if (simplifiedValue == value) { - simplified = attr; - return; - } - simplified = AttrT::get(simplifiedValue); - } - - // Simplification was successful, so update the attribute. - op->setAttr(name, simplified); -} - -static void simplifyAffineStructures(Block &block) { - auto context = block.front().getContext(); - DenseMap simplifiedAttrs; - - RewritePatternSet patterns(context); - AffineApplyOp::getCanonicalizationPatterns(patterns, context); - AffineForOp::getCanonicalizationPatterns(patterns, context); - AffineIfOp::getCanonicalizationPatterns(patterns, context); - FrozenRewritePatternSet frozenPatterns(std::move(patterns)); - - // The simplification of affine attributes will likely simplify the op. Try to - // fold/apply canonicalization patterns when we have affine dialect ops. - SmallVector opsToSimplify; - block.walk([&](Operation *op) { - for (auto attr : op->getAttrs()) { - if (auto mapAttr = attr.getValue().dyn_cast()) - simplifyAndUpdateAttr(op, attr.getName(), mapAttr, simplifiedAttrs); - else if (auto setAttr = attr.getValue().dyn_cast()) - simplifyAndUpdateAttr(op, attr.getName(), setAttr, simplifiedAttrs); - } - - if (isa(op)) - opsToSimplify.push_back(op); - }); - applyOpPatternsAndFold(opsToSimplify, frozenPatterns, /*strict=*/true); -} - /// Apply loop tiling to the input loop band and sink all intra-tile loops to -/// the innermost loop with the original loop order. Return the location of the -/// innermost tile-space loop. -Optional scalehls::applyLoopTiling(AffineLoopBand &band, - TileList tileList, bool simplify) { +/// the innermost loop with the original loop order. If "tileOrderOpt" is true, +/// the order of all tile-space loops are optimizaed after tiling. If +/// "unrollPointLoops" is true, all intra-tile loops (also called point loops) +/// are fully unrolled after tiling. +bool scalehls::applyLoopTiling(AffineLoopBand &band, TileList tileList, + bool tileOrderOpt, bool unrollPointLoops) { assert(!band.empty() && "no loops provided"); if (!isPerfectlyNested(band)) - return Optional(); + return false; - // Loop tiling. + // Record the original band size and attributes to make use of later. auto originalBandSize = band.size(); + SmallVector bandAttrs; + for (auto loop : band) + bandAttrs.push_back(getLoopDirective(loop)); + + // Apply loop tiling. AffineLoopBand tiledBand; if (failed(tilePerfectlyNested(band, tileList, &tiledBand))) - return Optional(); + return false; - // Simplify the tiled loop band if required. - if (simplify) { - band.clear(); - unsigned simplifiedBandSize = 0; - for (unsigned i = 0, e = tiledBand.size(); i < e; ++i) { - auto loop = tiledBand[i]; - - Optional tripCount = getConstantTripCount(loop); - if (i < originalBandSize - 1 || simplifiedBandSize > 0 || !tripCount || - tripCount.getValue() != 1) - (void)normalizeAffineFor(loop); - - if (loop && !loop.getLoopBody().empty()) { - band.push_back(loop); - if (i < originalBandSize) - ++simplifiedBandSize; - } - } - simplifyAffineStructures(*band.front().getBody()); - return simplifiedBandSize - 1; - } - - // Otherwise, directly return the tiled loop band. + // Get all tile-space loops and reannotate the attributes. band = tiledBand; - return originalBandSize - 1; + band.resize(originalBandSize); + for (auto zip : llvm::zip(band, bandAttrs)) + if (std::get<1>(zip)) + setLoopDirective(std::get<0>(zip), std::get<1>(zip)); + + // Apply loop order optimization and point loops unrolling if required. + if (tileOrderOpt) + applyAffineLoopOrderOpt(band); + if (unrollPointLoops) + applyFullyLoopUnrolling(*band.back().getBody()); + return true; } namespace { @@ -159,13 +90,9 @@ struct AffineLoopUnrollAndPipeline sizes.push_back(1); } - // Apply loop tiling and extract the tile loops if applicable. - if (auto tileLoc = applyLoopTiling(band, sizes)) - band.resize(tileLoc.getValue() + 1); - - // Apply loop order optimization and pipelining. - if (loopOrderOpt) - applyAffineLoopOrderOpt(band); + // Apply loop unrolling and pipelining. + applyLoopTiling(band, sizes, /*tileOrderOpt=*/loopOrderOpt.getValue(), + /*unrollPointLoops=*/true); applyLoopPipelining(band, band.size() - 1, (unsigned)1); } } diff --git a/lib/Transforms/MultipleLevelDSE.cpp b/lib/Transforms/MultipleLevelDSE.cpp index 29db2d3..aa35dd6 100644 --- a/lib/Transforms/MultipleLevelDSE.cpp +++ b/lib/Transforms/MultipleLevelDSE.cpp @@ -676,7 +676,9 @@ bool ScaleHLSOptimizer::simplifyLoopNests(FuncOp func) { // unrolling to it. tmpFunc.walk([&](AffineForOp loop) { if (loop->getAttrOfType("opt_flag")) { - applyFullyUnrollAndPartition(*loop.getBody(), tmpFunc); + applyFullyLoopUnrolling(*loop.getBody()); + applySimplificationOpts(tmpFunc); + applyAutoArrayPartition(tmpFunc); return; } }); @@ -685,9 +687,11 @@ bool ScaleHLSOptimizer::simplifyLoopNests(FuncOp func) { estimator.estimateFunc(tmpFunc); // Fully unroll the candidate loop or delve into child loops. - if (getResource(tmpFunc).getDsp() <= maxDspNum) - applyFullyUnrollAndPartition(*candidate.getBody(), func); - else { + if (getResource(tmpFunc).getDsp() <= maxDspNum) { + applyFullyLoopUnrolling(*candidate.getBody()); + applySimplificationOpts(func); + applyAutoArrayPartition(func); + } else { auto childForOps = candidate.getOps(); targetLoops.append(childForOps.begin(), childForOps.end()); } diff --git a/lib/Transforms/Passes.cpp b/lib/Transforms/Passes.cpp index 6be12de..663d5fb 100644 --- a/lib/Transforms/Passes.cpp +++ b/lib/Transforms/Passes.cpp @@ -83,7 +83,9 @@ void scalehls::registerScaleHLSPyTorchPipeline() { scalehls::createAffineLoopUnrollAndPipelinePass(loopUnrollSize)); } - // Memory accessing simplifications. + // Apply simplifications. + pm.addPass(mlir::createAffineLoopNormalizePass()); + pm.addPass(mlir::createSimplifyAffineStructuresPass()); pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(scalehls::createSimplifyAffineIfPass()); pm.addPass(scalehls::createAffineStoreForwardPass()); diff --git a/lib/Transforms/Utils.cpp b/lib/Transforms/Utils.cpp index 119cedb..5674c42 100644 --- a/lib/Transforms/Utils.cpp +++ b/lib/Transforms/Utils.cpp @@ -6,6 +6,7 @@ #include "scalehls/Transforms/Utils.h" #include "mlir/Dialect/Affine/LoopUtils.h" +#include "mlir/Dialect/Affine/Passes.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/Passes.h" #include "scalehls/Transforms/Passes.h" @@ -84,10 +85,36 @@ void scalehls::setFuncDirective(Operation *op, bool pipeline, // Loop transform utils //===----------------------------------------------------------------------===// +static void addSimplificationPipeline(PassManager &pm) { + // To factor out the redundant affine operations. + pm.addPass(createAffineLoopNormalizePass()); + pm.addPass(createSimplifyAffineStructuresPass()); + pm.addPass(createCanonicalizerPass()); + pm.addPass(createSimplifyAffineIfPass()); + + // To simplify the memory accessing. Note that the store forwarding is + // non-trivial and has a worst case complexity of O(n^2). + pm.addPass(createAffineStoreForwardPass()); + pm.addPass(createSimplifyMemrefAccessPass()); + + // Generic common sub expression elimination. + pm.addPass(createCSEPass()); + pm.addPass(createReduceInitialIntervalPass()); +} + +/// Apply simplification optimizations. +bool scalehls::applySimplificationOpts(FuncOp func) { + // Apply general optimizations. + PassManager optPM(func.getContext(), "builtin.func"); + addSimplificationPipeline(optPM); + if (failed(optPM.run(func))) + return false; + return true; +} + /// Fully unroll all loops insides of a block. -bool scalehls::applyFullyLoopUnrolling(Block &block) { - // Try 8 iterations before exiting. - for (auto i = 0; i < 8; ++i) { +bool scalehls::applyFullyLoopUnrolling(Block &block, unsigned maxIterNum) { + for (unsigned i = 0; i < maxIterNum; ++i) { bool hasFullyUnrolled = true; block.walk([&](AffineForOp loop) { if (failed(loopUnrollFull(loop))) @@ -103,46 +130,6 @@ bool scalehls::applyFullyLoopUnrolling(Block &block) { return true; } -static void addPassPipeline(PassManager &pm) { - // To factor out the redundant AffineApply/AffineIf operations. - pm.addPass(createCanonicalizerPass()); - pm.addPass(createSimplifyAffineIfPass()); - - // To simplify the memory accessing. Note that the store forwarding is - // non-trivial and has a worst case complexity of O(n^2). - pm.addPass(createAffineStoreForwardPass()); - pm.addPass(createSimplifyMemrefAccessPass()); - - // Generic common sub expression elimination. - pm.addPass(createCSEPass()); - pm.addPass(createReduceInitialIntervalPass()); -} - -bool scalehls::applyMemoryAccessOpt(FuncOp func) { - // Apply general optimizations. - PassManager optPM(func.getContext(), "builtin.func"); - addPassPipeline(optPM); - if (failed(optPM.run(func))) - return false; - - return true; -} - -bool scalehls::applyFullyUnrollAndPartition(Block &block, FuncOp func) { - applyFullyLoopUnrolling(block); - - // Apply general optimizations. - PassManager optPM(func.getContext(), "builtin.func"); - addPassPipeline(optPM); - if (failed(optPM.run(func))) - return false; - - // Apply the best suitable array partition strategy to the function. - applyAutoArrayPartition(func); - - return true; -} - /// Apply optimization strategy to a loop band. The ancestor function is also /// passed in because the post-tiling optimizations have to take function as /// target, e.g. canonicalizer and array partition. @@ -152,27 +139,22 @@ bool scalehls::applyOptStrategy(AffineLoopBand &band, FuncOp func, if (!func->isProperAncestor(band.front())) return false; - // Apply loop tiling. - auto pipelineLoopLoc = applyLoopTiling(band, tileList); - if (!pipelineLoopLoc) - return false; - // Apply LegalizeToHLSCpp conversion. applyLegalizeToHLSCpp(func, /*isTopFunc=*/true); + // Apply loop tiling. + if (!applyLoopTiling(band, tileList, /*tileOrderOpt=*/false, + /*unrollPointLoops=*/true)) + return false; + // Apply loop pipelining. - if (!applyLoopPipelining(band, pipelineLoopLoc.getValue(), targetII)) + if (!applyLoopPipelining(band, band.size() - 1, targetII)) return false; - // Apply generic optimizations. - PassManager optPM(func.getContext(), "builtin.func"); - addPassPipeline(optPM); - if (failed(optPM.run(func))) - return false; - - // Apply the best suitable array partition strategy to the function. + // Apply memory access optimizations and the best suitable array partition + // strategy to the function. + applySimplificationOpts(func); applyAutoArrayPartition(func); - return true; } @@ -181,32 +163,24 @@ bool scalehls::applyOptStrategy(FuncOp func, ArrayRef tileLists, ArrayRef targetIIs) { AffineLoopBands bands; getLoopBands(func.front(), bands); - - // Apply loop tiling and pipelining to all loop bands. - SmallVector pipelineLoopLocs; - for (unsigned i = 0, e = bands.size(); i < e; ++i) { - auto pipelineLoopLoc = applyLoopTiling(bands[i], tileLists[i]); - if (!pipelineLoopLoc) - return false; - pipelineLoopLocs.push_back(pipelineLoopLoc.getValue()); - } + assert(bands.size() == tileLists.size() && bands.size() == targetIIs.size() && + "unexpected size of tile lists or target IIs"); // Apply LegalizeToHLSCpp conversion. applyLegalizeToHLSCpp(func, /*isTopFunc=*/true); - for (unsigned i = 0, e = bands.size(); i < e; ++i) { - if (!applyLoopPipelining(bands[i], pipelineLoopLocs[i], targetIIs[i])) + // Apply loop tiling to all loop bands. + for (unsigned i = 0, e = bands.size(); i < e; ++i) + if (!applyLoopTiling(bands[i], tileLists[i])) return false; - } - // Apply generic optimizations. - PassManager optPM(func.getContext(), "builtin.func"); - addPassPipeline(optPM); - if (failed(optPM.run(func))) - return false; + for (unsigned i = 0, e = bands.size(); i < e; ++i) + if (!applyLoopPipelining(bands[i], bands[i].size() - 1, targetIIs[i])) + return false; - // Apply the best suitable array partition strategy to the function. + // Apply memory access optimizations and the best suitable array partition + // strategy to the function. + applySimplificationOpts(func); applyAutoArrayPartition(func); - return true; } diff --git a/test/Transforms/Loop/affine_loop_unroll_and_pipeline.mlir b/test/Transforms/Loop/affine_loop_unroll_and_pipeline.mlir index 06524b5..f1e3c2c 100644 --- a/test/Transforms/Loop/affine_loop_unroll_and_pipeline.mlir +++ b/test/Transforms/Loop/affine_loop_unroll_and_pipeline.mlir @@ -1,22 +1,19 @@ // RUN: scalehls-opt -affine-loop-unroll-and-pipeline="unroll-size=2 loop-order-opt=false" %s | FileCheck %s -// CHECK: #map0 = affine_map<(d0, d1) -> (d0 + d1 * 2)> -// CHECK: #map1 = affine_map<(d0) -> (d0 + 1)> +// CHECK: #map = affine_map<(d0) -> (d0 + 1)> // CHECK: #set0 = affine_set<(d0, d1) : (d0 - d1 >= 0)> // CHECK: #set1 = affine_set<(d0) : (d0 == 0)> #set0 = affine_set<(d0, d1) : (d0 - d1 >= 0)> #set1 = affine_set<(d0) : (d0 == 0)> module { func @test_syrk(%arg0: f32, %arg1: f32, %arg2: memref<16x16xf32>, %arg3: memref<16x16xf32>) { - // CHECK: affine.for %arg4 = 0 to 8 { + // CHECK: affine.for %arg4 = 0 to 16 step 2 { // CHECK: affine.for %arg5 = 0 to 16 { // CHECK: affine.for %arg6 = 0 to 16 { // CHECK-NOT: affine.for %arg7 = 0 to 2 { affine.for %arg4 = 0 to 16 { affine.for %arg5 = 0 to 16 { affine.for %arg6 = 0 to 16 { - // CHECK: %0 = affine.apply #map0(%c0, %arg4) - // CHECK: affine.if #set0(%arg5, %arg6) { affine.if #set0(%arg5, %arg6) { %0 = affine.load %arg3[%arg5, %arg6] : memref<16x16xf32> %1 = arith.mulf %arg1, %0 : f32 @@ -31,9 +28,7 @@ module { %7 = arith.addf %6, %4 : f32 affine.store %7, %arg3[%arg5, %arg6] : memref<16x16xf32> } - // CHECK: %1 = affine.apply #map1(%c0) - // CHECK: %2 = affine.apply #map0(%1, %arg4) - // CHECK: affine.if #set0(%arg5, %arg6) { + // CHECK: %0 = affine.apply #map(%arg4) } } } diff --git a/tools/pyscalehls/pyscalehls.py b/tools/pyscalehls/pyscalehls.py index ec6f302..9c08df1 100755 --- a/tools/pyscalehls/pyscalehls.py +++ b/tools/pyscalehls/pyscalehls.py @@ -71,10 +71,11 @@ def main(): # Note: We use the trip count to generate this example "factors". factors = np.ones(band.depth, dtype=int) factors[-1] = band.get_trip_count(band.depth - 1) / 4 - loc = scalehls.loop_tiling(band, factors, True) # simplify = True + # tileOrderOpt = False, unrollPointLoops = True + scalehls.loop_tiling(band, factors, False, True) # Apply loop pipelining. All loops inside of the pipelined loop are fully unrolled. - scalehls.loop_pipelining(band, loc, 3) # targetII = 3 + scalehls.loop_pipelining(band, band.depth - 1, 3) # targetII = 3 # Traverse all arrays in the function. arrays = scalehls.ArrayList(func) @@ -93,7 +94,7 @@ def main(): scalehls.legalize_to_hlscpp( func, func.sym_name.value == opts.function) - # Optimize memory accesses through store forwarding, etc. + # Apply simplifications. scalehls.memory_access_opt(func) # Apply suitable array partition strategies through analyzing the array access pattern.