diff --git a/include/Transforms/Passes.h b/include/Transforms/Passes.h index e4a7dcc..ccae218 100644 --- a/include/Transforms/Passes.h +++ b/include/Transforms/Passes.h @@ -27,6 +27,10 @@ bool applyAffineLoopPerfection(AffineForOp loop, OpBuilder &builder); /// Apply remove variable bound to all inner loops of the input loop. bool applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder); +/// Apply loop pipelining to the input loop, all inner loops are automatically +/// fully unrolled. +bool applyLoopPipelining(AffineForOp loop, OpBuilder &builder); + //===----------------------------------------------------------------------===// // Optimization Pass Entries //===----------------------------------------------------------------------===// diff --git a/include/Transforms/Passes.td b/include/Transforms/Passes.td index a75383a..b7845ab 100644 --- a/include/Transforms/Passes.td +++ b/include/Transforms/Passes.td @@ -32,7 +32,7 @@ def LoopPipelining : Pass<"loop-pipelining", "FuncOp"> { let constructor = "mlir::scalehls::createLoopPipeliningPass()"; let options = [ - Option<"pipelineLevel", "pipeline-level", "unsigned", /*default=*/"1", + Option<"pipelineLevel", "pipeline-level", "unsigned", /*default=*/"0", "Positive number: loop level to be pipelined (from innermost)"> ]; } diff --git a/lib/Transforms/AffineLoopPerfection.cpp b/lib/Transforms/AffineLoopPerfection.cpp index e172e8e..ade49c4 100644 --- a/lib/Transforms/AffineLoopPerfection.cpp +++ b/lib/Transforms/AffineLoopPerfection.cpp @@ -46,7 +46,7 @@ bool scalehls::applyAffineLoopPerfection(AffineForOp innermostLoop, while (true) { // Get the parent loop of the child loop. auto childLoop = loops.back(); - auto loop = dyn_cast(childLoop.getParentOp()); + auto loop = childLoop.getParentOfType(); // Break the procedure if the parent operation is no longer a loop. if (!loop) diff --git a/lib/Transforms/AffineStoreForward.cpp b/lib/Transforms/AffineStoreForward.cpp index 2eca6fa..e1c9306 100644 --- a/lib/Transforms/AffineStoreForward.cpp +++ b/lib/Transforms/AffineStoreForward.cpp @@ -16,6 +16,9 @@ using namespace mlir; using namespace scalehls; +// The difference between this pass and built-in memref-dataflow-opt is this +// pass support to forward the StoreOps that are conditionally executed. + namespace { // The store to load forwarding relies on three conditions: // diff --git a/lib/Transforms/ArrayPartition.cpp b/lib/Transforms/ArrayPartition.cpp index 4ff908e..31cd0e0 100644 --- a/lib/Transforms/ArrayPartition.cpp +++ b/lib/Transforms/ArrayPartition.cpp @@ -137,6 +137,9 @@ void ArrayPartition::runOnOperation() { // TODO: how to decide which to pick? applyArrayPartition(loadMap, builder); applyArrayPartition(storeMap, builder); + + // TODO: how to handle the case when different sub-functions have + // different array partition strategy selected? } } diff --git a/lib/Transforms/LoopPipelining.cpp b/lib/Transforms/LoopPipelining.cpp index 1211f75..37e918b 100644 --- a/lib/Transforms/LoopPipelining.cpp +++ b/lib/Transforms/LoopPipelining.cpp @@ -2,6 +2,7 @@ // //===----------------------------------------------------------------------===// +#include "Analysis/Utils.h" #include "Transforms/Passes.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -13,60 +14,80 @@ using namespace scalehls; namespace { struct LoopPipelining : public LoopPipeliningBase { - void runOnOperation() override; + void runOnOperation() override { + auto func = getOperation(); + auto builder = OpBuilder(func); + + // Walk through all loops. + for (auto forOp : func.getOps()) { + // Collect all innermost loops. + SmallVector innermostLoops; + forOp.walk([&](AffineForOp loop) { + if (getChildLoopNum(loop) == 0) + innermostLoops.push_back(loop); + }); + + // Apply loop pipelining to coresponding level of each innermost loop. + for (auto loop : innermostLoops) { + auto currentLoop = loop; + unsigned loopLevel = 0; + while (true) { + auto parentLoop = currentLoop.getParentOfType(); + + // If meet the outermost loop, pipeline the current loop. + if (!parentLoop || pipelineLevel == loopLevel) { + applyLoopPipelining(currentLoop, builder); + break; + } + + // Move to the next loop level. + currentLoop = parentLoop; + ++loopLevel; + } + } + } + + // Canonicalize the IR after loop pipelining. + OwningRewritePatternList patterns; + for (auto *op : builder.getContext()->getRegisteredOperations()) + op->getCanonicalizationPatterns(patterns, builder.getContext()); + + applyPatternsAndFoldGreedily(func.getRegion(), std::move(patterns)); + } }; } // namespace -void LoopPipelining::runOnOperation() { - auto func = getOperation(); - auto builder = OpBuilder(func); +/// Apply loop pipelining to the input loop, all inner loops are automatically +/// fully unrolled. +bool scalehls::applyLoopPipelining(AffineForOp targetLoop, OpBuilder &builder) { + targetLoop.setAttr("pipeline", builder.getBoolAttr(true)); - // Walk through loops in the function. - for (auto forOp : func.getOps()) { - // TODO: support more fine-grained pipeline insertion configuration. - SmallVector nestedLoops; - forOp.walk([&](mlir::AffineForOp loop) { nestedLoops.push_back(loop); }); + // All inner loops of the pipelined loop are automatically unrolled. + targetLoop.walk([&](AffineForOp loop) { + if (loop != targetLoop) + loopUnrollFull(loop); + }); - auto targetLoop = nestedLoops.back(); - if (nestedLoops.size() > pipelineLevel) - targetLoop = *std::next(nestedLoops.begin(), pipelineLevel); - - targetLoop.setAttr("pipeline", builder.getBoolAttr(true)); - - // All inner loops of the pipelined loop are automatically unrolled. - targetLoop.walk([&](mlir::AffineForOp loop) { - if (loop != targetLoop) - loopUnrollFull(loop); - }); - - // All outer loops that perfect nest the pipelined loop can be flattened. - SmallVector flattenedLoops; - flattenedLoops.push_back(targetLoop); - while (true) { - auto currentLoop = flattenedLoops.back(); - if (auto outerLoop = currentLoop.getParentOfType()) { - // Only if the current loop is the only child loop of the outer loop, - // the outer loop can be flattened into the current loop. - auto &body = outerLoop.getLoopBody().front(); - if (&body.front() == currentLoop && body.getOperations().size() == 2) { - flattenedLoops.push_back(outerLoop); - outerLoop.setAttr("flatten", builder.getBoolAttr("true")); - } else - break; + // All outer loops that perfect nest the pipelined loop can be flattened. + SmallVector flattenedLoops; + flattenedLoops.push_back(targetLoop); + while (true) { + auto currentLoop = flattenedLoops.back(); + if (auto outerLoop = currentLoop.getParentOfType()) { + // Only if the current loop is the only child loop of the outer loop, the + // outer loop can be flattened into the current loop. + auto &body = outerLoop.getLoopBody().front(); + if (&body.front() == currentLoop && body.getOperations().size() == 2) { + flattenedLoops.push_back(outerLoop); + outerLoop.setAttr("flatten", builder.getBoolAttr(true)); } else break; - } + } else + break; } - // Canonicalize the IR after loop unrolling. - OwningRewritePatternList patterns; - - auto *context = &getContext(); - for (auto *op : context->getRegisteredOperations()) - op->getCanonicalizationPatterns(patterns, context); - - applyPatternsAndFoldGreedily(func.getOperation()->getRegions(), - std::move(patterns)); + // For now, this method will always success. + return true; } std::unique_ptr scalehls::createLoopPipeliningPass() {