[StaticLogic] Add conversion pass for Affine loop nests. (#2157)

This is just the start of a pass, but this is enough to use the
scheduling tools and build a pipeline. At the moment, this doesn't
make use of the schedule, it just sets up a loop template. This is
just enough to lower a single loop and generate the appropriate
induction variable increments. Checkpointing here before moving any
further.

After this, I plan to add a helper class to this pass to answer the
questions we will need to flesh out createStaticLogicPipeline.

My goal is to take something as simple as this and start threading it
through the Calyx lowering to get a very thin path working before
adding support for more cases.

Co-authored-by: Julian Oppermann <oppermann@esa.tu-darmstadt.de>
This commit is contained in:
mikeurbach 2021-11-12 10:28:27 -07:00 committed by GitHub
parent bd0e52efac
commit 0308cf325e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 336 additions and 1 deletions

View File

@ -0,0 +1,22 @@
//===- AffineToStaticLogic.h ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef CIRCT_CONVERSION_AFFINETOSTATICLOGIC_H_
#define CIRCT_CONVERSION_AFFINETOSTATICLOGIC_H_
#include <memory>
namespace mlir {
class Pass;
} // namespace mlir
namespace circt {
std::unique_ptr<mlir::Pass> createAffineToStaticLogic();
} // namespace circt
#endif // CIRCT_CONVERSION_AFFINETOSTATICLOGIC_H_

View File

@ -13,6 +13,7 @@
#ifndef CIRCT_CONVERSION_PASSES_H
#define CIRCT_CONVERSION_PASSES_H
#include "circt/Conversion/AffineToStaticLogic.h"
#include "circt/Conversion/CalyxToHW.h"
#include "circt/Conversion/ExportVerilog.h"
#include "circt/Conversion/FIRRTLToHW.h"
@ -25,6 +26,12 @@
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassRegistry.h"
namespace mlir {
namespace arith {
class ArithmeticDialect;
} // namespace arith
} // namespace mlir
namespace circt {
// Generate the code for registering conversion passes.

View File

@ -15,6 +15,24 @@
include "mlir/Pass/PassBase.td"
//===----------------------------------------------------------------------===//
// AffineToStaticLogic
//===----------------------------------------------------------------------===//
def AffineToStaticLogic : FunctionPass<"convert-affine-to-staticlogic"> {
let summary = "Convert Affine dialect to StaticLogic pipelines";
let description = [{
This pass analyzes Affine loops and control flow, creates a Scheduling
problem using the Calyx operator library, solves the problem, and lowers
the loops to a StaticLogic pipeline.
}];
let constructor = "circt::createAffineToStaticLogic()";
let dependentDialects = [
"circt::staticlogic::StaticLogicDialect",
"mlir::arith::ArithmeticDialect"
];
}
//===----------------------------------------------------------------------===//
// ExportVerilog and ExportSplitVerilog
//===----------------------------------------------------------------------===//

View File

@ -32,7 +32,6 @@ static void checkMemrefDependence(SmallVectorImpl<Operation *> &memoryOps,
for (auto *destination : memoryOps) {
if (source == destination)
continue;
// Initialize the dependence list for this destination.
if (results.count(destination) == 0)
results[destination] = SmallVector<MemoryDependence>();

View File

@ -0,0 +1,245 @@
//===- AffineToStaticlogic.cpp --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "circt/Conversion/AffineToStaticLogic.h"
#include "../PassDetail.h"
#include "circt/Analysis/SchedulingAnalysis.h"
#include "circt/Dialect/StaticLogic/StaticLogic.h"
#include "circt/Scheduling/Algorithms.h"
#include "circt/Scheduling/Problems.h"
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/Transforms/LoopUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "affine-to-staticlogic"
using namespace mlir;
using namespace mlir::arith;
using namespace circt;
using namespace circt::analysis;
using namespace circt::scheduling;
using namespace circt::staticlogic;
namespace {
struct AffineToStaticLogic
: public AffineToStaticLogicBase<AffineToStaticLogic> {
void runOnFunction() override;
private:
LogicalResult populateOperatorTypes(SmallVectorImpl<AffineForOp> &loopNest);
LogicalResult solveSchedulingProblem(SmallVectorImpl<AffineForOp> &loopNest);
LogicalResult
createStaticLogicPipeline(SmallVectorImpl<AffineForOp> &loopNest);
CyclicSchedulingAnalysis *schedulingAnalysis;
};
} // namespace
void AffineToStaticLogic::runOnFunction() {
// Get scheduling analysis for the whole function.
schedulingAnalysis = &getAnalysis<CyclicSchedulingAnalysis>();
// Collect perfectly nested loops and work on them.
auto outerLoops = getOperation().getOps<AffineForOp>();
for (auto root : llvm::make_early_inc_range(outerLoops)) {
SmallVector<AffineForOp> nestedLoops;
getPerfectlyNestedLoops(nestedLoops, root);
// Restrict to single loops to simplify things for now.
if (nestedLoops.size() != 1)
continue;
// Populate the target operator types.
if (failed(populateOperatorTypes(nestedLoops)))
return signalPassFailure();
// Solve the scheduling problem computed by the analysis.
if (failed(solveSchedulingProblem(nestedLoops)))
return signalPassFailure();
// Convert the IR.
if (failed(createStaticLogicPipeline(nestedLoops)))
return signalPassFailure();
}
}
/// Populate the schedling problem operator types for the dialect we are
/// targetting. Right now, we assume Calyx, which has a standard library with
/// well-defined operator latencies. Ultimately, we should move this to a
/// dialect interface in the Scheduling dialect.
LogicalResult AffineToStaticLogic::populateOperatorTypes(
SmallVectorImpl<AffineForOp> &loopNest) {
// Scheduling analyis only considers the innermost loop nest for now.
auto forOp = loopNest.back();
// Retrieve the cyclic scheduling problem for this loop.
CyclicProblem &problem = schedulingAnalysis->getProblem(forOp);
// Load the Calyx operator library into the problem. This is a very minimal
// set of arithmetic and memory operators for now. This should ultimately be
// pulled out into some sort of dialect interface.
Problem::OperatorType combOpr = problem.getOrInsertOperatorType("comb");
problem.setLatency(combOpr, 0);
Problem::OperatorType seqOpr = problem.getOrInsertOperatorType("seq");
problem.setLatency(seqOpr, 1);
Problem::OperatorType mcOpr = problem.getOrInsertOperatorType("multicycle");
problem.setLatency(mcOpr, 3);
Operation *unsupported;
WalkResult result = forOp.getBody()->walk([&](Operation *op) {
return TypeSwitch<Operation *, WalkResult>(op)
.Case<AddIOp, AffineIfOp, AffineYieldOp, mlir::ConstantOp, IndexCastOp,
memref::AllocaOp>([&](Operation *combOp) {
// Some known combinational ops.
problem.setLinkedOperatorType(combOp, combOpr);
return WalkResult::advance();
})
.Case<AffineReadOpInterface, AffineWriteOpInterface>(
[&](Operation *seqOp) {
// Some known sequential ops. In certain cases, reads may be
// combinational in Calyx, but taking advantage of that is left as
// a future enhancement.
problem.setLinkedOperatorType(seqOp, seqOpr);
return WalkResult::advance();
})
.Case<MulIOp>([&](Operation *mcOp) {
// Some known multi-cycle ops.
problem.setLinkedOperatorType(mcOp, mcOpr);
return WalkResult::advance();
})
.Default([&](Operation *badOp) {
unsupported = op;
return WalkResult::interrupt();
});
});
if (result.wasInterrupted())
return forOp.emitError("unsupported operation ") << *unsupported;
return success();
}
/// Solve the pre-computed scheduling problem.
LogicalResult AffineToStaticLogic::solveSchedulingProblem(
SmallVectorImpl<AffineForOp> &loopNest) {
// Scheduling analyis only considers the innermost loop nest for now.
auto forOp = loopNest.back();
// Retrieve the cyclic scheduling problem for this loop.
CyclicProblem &problem = schedulingAnalysis->getProblem(forOp);
// Optionally debug problem inputs.
LLVM_DEBUG(forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {
llvm::dbgs() << "Scheduling inputs for " << *op;
auto opr = problem.getLinkedOperatorType(op);
llvm::dbgs() << "\n opr = " << opr;
llvm::dbgs() << "\n latency = " << problem.getLatency(*opr);
for (auto dep : problem.getDependences(op))
if (dep.isAuxiliary())
llvm::dbgs() << "\n dep = { distance = " << problem.getDistance(dep)
<< ", source = " << *dep.getSource() << " }";
llvm::dbgs() << "\n\n";
}));
// Verify and solve the problem.
if (failed(problem.check()))
return failure();
auto *anchor = forOp.getBody()->getTerminator();
if (failed(scheduleSimplex(problem, anchor)))
return failure();
// Optionally debug problem outputs.
LLVM_DEBUG({
llvm::dbgs() << "Scheduled initiation interval = "
<< problem.getInitiationInterval() << "\n\n";
forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {
llvm::dbgs() << "Scheduling outputs for " << *op;
llvm::dbgs() << "\n start = " << problem.getStartTime(op);
llvm::dbgs() << "\n\n";
});
});
return success();
}
/// Create the pipeline op for a loop nest.
LogicalResult AffineToStaticLogic::createStaticLogicPipeline(
SmallVectorImpl<AffineForOp> &loopNest) {
auto outerLoop = loopNest.front();
auto innerLoop = loopNest.back();
ImplicitLocOpBuilder builder(outerLoop.getLoc(), outerLoop);
// Create constants for the loop's lower and upper bounds.
int64_t lbValue = innerLoop.getConstantLowerBound();
auto lowerBound = builder.create<arith::ConstantOp>(
IntegerAttr::get(builder.getI64Type(), lbValue));
int64_t ubValue = innerLoop.getConstantUpperBound();
auto upperBound = builder.create<arith::ConstantOp>(
IntegerAttr::get(builder.getI64Type(), ubValue));
int64_t stepValue = innerLoop.getStep();
auto step = builder.create<arith::ConstantOp>(
IntegerAttr::get(builder.getI64Type(), stepValue));
// Create the pipeline op, with the same result types as the inner loop. An
// iter arg is created for the induction variable.
TypeRange resultTypes = innerLoop.getResultTypes();
SmallVector<Value> iterArgs;
iterArgs.push_back(lowerBound);
iterArgs.append(innerLoop.getIterOperands().begin(),
innerLoop.getIterOperands().end());
auto pipeline = builder.create<PipelineWhileOp>(resultTypes, iterArgs);
// Create the condition, which currently just compares the induction variable
// to the upper bound.
Block &condBlock = pipeline.getCondBlock();
builder.setInsertionPointToStart(&condBlock);
auto cmpResult = builder.create<arith::CmpIOp>(
builder.getI1Type(), arith::CmpIPredicate::ult, condBlock.getArgument(0),
upperBound);
condBlock.getTerminator()->insertOperands(0, {cmpResult});
// Create the first stage.
Block &stagesBlock = pipeline.getStagesBlock();
builder.setInsertionPointToStart(&stagesBlock);
auto stage = builder.create<PipelineStageOp>(lowerBound.getType());
auto &stageBlock = stage.getBodyBlock();
builder.setInsertionPointToStart(&stageBlock);
// Add the induction variable increment to the first stage.
auto incResult =
builder.create<arith::AddIOp>(stagesBlock.getArgument(0), step);
stageBlock.getTerminator()->insertOperands(0, {incResult});
// Add the induction variable result to the terminator iter args.
auto stagesTerminator =
cast<PipelineTerminatorOp>(stagesBlock.getTerminator());
stagesTerminator.iter_argsMutable().append({stage.getResult(0)});
// Remove the loop nest from the IR.
for (auto loop : llvm::reverse(loopNest))
loop.erase();
return success();
}
std::unique_ptr<mlir::Pass> circt::createAffineToStaticLogic() {
return std::make_unique<AffineToStaticLogic>();
}

View File

@ -0,0 +1,9 @@
add_circt_library(CIRCTAffineToStaticLogic
AffineToStaticLogic.cpp
LINK_LIBS PUBLIC
MLIRPass
CIRCTScheduling
CIRCTSchedulingAnalysis
CIRCTStaticLogicOps
)

View File

@ -1,3 +1,4 @@
add_subdirectory(AffineToStaticLogic)
add_subdirectory(CalyxToHW)
add_subdirectory(ExportVerilog)
add_subdirectory(FIRRTLToHW)

View File

@ -13,6 +13,10 @@
#include "mlir/Pass/Pass.h"
namespace mlir {
namespace arith {
class ArithmeticDialect;
} // namespace arith
namespace scf {
class SCFDialect;
} // namespace scf

View File

@ -0,0 +1,29 @@
// RUN: circt-opt -convert-affine-to-staticlogic %s | FileCheck %s
// CHECK-LABEL: func @minimal
func @minimal() {
// Setup constants.
// CHECK: %[[LB:.+]] = arith.constant 0 : [[ITER_TYPE:.+]]
// CHECK: %[[UB:.+]] = arith.constant 10 : [[ITER_TYPE]]
// CHECK: %[[STEP:.+]] = arith.constant 1 : [[ITER_TYPE]]
// Pipeline header.
// CHECK: staticlogic.pipeline.while iter_args(%[[ITER_ARG:.+]] = %[[LB]]) : ([[ITER_TYPE]]) -> ()
// Condition block.
// CHECK: %[[COND_RESULT:.+]] = arith.cmpi ult, %[[ITER_ARG]]
// CHECK: staticlogic.pipeline.register %[[COND_RESULT]]
// First stage.
// CHECK: %[[STAGE0:.+]] = staticlogic.pipeline.stage
// CHECK: %[[ITER_INC:.+]] = arith.addi %[[ITER_ARG]], %[[STEP]]
// CHECK: staticlogic.pipeline.register %[[ITER_INC]]
// Pipeline terminator.
// CHECK: staticlogic.pipeline.terminator iter_args(%[[STAGE0]]), results()
affine.for %arg1 = 0 to 10 {
}
return
}

View File

@ -8,6 +8,7 @@ add_llvm_tool(circt-opt
llvm_update_compile_flags(circt-opt)
target_link_libraries(circt-opt
PRIVATE
CIRCTAffineToStaticLogic
CIRCTAnalysisTestPasses
CIRCTCalyx
CIRCTCalyxToHW