[RemoveVarLoopBound] simplify impl logic; [QoREstimation] update to FuncOp pass, remove op-latency configuration

This commit is contained in:
Hanchen Ye 2020-12-13 22:07:04 -06:00
parent 142ffadd14
commit 31ce83be83
7 changed files with 60 additions and 104 deletions

View File

@ -1,23 +0,0 @@
[200MHz]
op=1
# define INT_ADD 0.5
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
# define IMULT 7.0 //actual 7.0
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
# define IDIV 36.0 //not chain
# define U_DIV 7.0 //actual 7.0 same with imul
# define UDIV 36.0
# define FP_ADD 8.0 //not chain
# define FP_MULT 5.0 //not chain
# define FP_DIV 16.0 //not chain
# define SI_TO_FP 6.0
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
# define SHIFT 0.2
# define ALLOCA_LATENCY 1.0
# define GEP_LATENCY 1.0
# define CAST_LATENCY 0.4
# define PHI_LATENCY 1.5
# define ICMP_LATENCY 0.5
# define FCMP_LATENCY 8.0//0.5
# define SELECT_LATENCY 0.2
# define CALL_LATENCY 1.0

View File

@ -1,2 +1,26 @@
[spec] [spec]
frequency=200MHz frequency=200MHz
[200MHz]
op=2333
# define INT_ADD 0.5
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
# define IMULT 7.0 //actual 7.0
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
# define IDIV 36.0 //not chain
# define U_DIV 7.0 //actual 7.0 same with imul
# define UDIV 36.0
# define FP_ADD 8.0 //not chain
# define FP_MULT 5.0 //not chain
# define FP_DIV 16.0 //not chain
# define SI_TO_FP 6.0
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
# define SHIFT 0.2
# define ALLOCA_LATENCY 1.0
# define GEP_LATENCY 1.0
# define CAST_LATENCY 0.4
# define PHI_LATENCY 1.5
# define ICMP_LATENCY 0.5
# define FCMP_LATENCY 8.0//0.5
# define SELECT_LATENCY 0.2
# define CALL_LATENCY 1.0

View File

@ -7,7 +7,7 @@
include "mlir/Pass/PassBase.td" include "mlir/Pass/PassBase.td"
def QoREstimation : Pass<"qor-estimation", "ModuleOp"> { def QoREstimation : Pass<"qor-estimation", "FuncOp"> {
let summary = "Estimate the performance and resource utilization"; let summary = "Estimate the performance and resource utilization";
let description = [{ let description = [{
This qor-estimation pass will analyze the input CDFG and pragma operations This qor-estimation pass will analyze the input CDFG and pragma operations
@ -21,9 +21,6 @@ def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
Option<"targetSpec", "target-spec", "std::string", Option<"targetSpec", "target-spec", "std::string",
/*default=*/"\"../config/target-spec.ini\"", /*default=*/"\"../config/target-spec.ini\"",
"File path: target backend specifications and configurations">, "File path: target backend specifications and configurations">,
Option<"opLatency", "op-latency", "std::string",
/*default=*/"\"../config/op-latency.ini\"",
"File path: profiling data for operation latency">
]; ];
} }

View File

@ -117,7 +117,6 @@ public:
void analyzeBlock(Block &block); void analyzeBlock(Block &block);
void analyzeFunc(FuncOp func); void analyzeFunc(FuncOp func);
void analyzeModule(ModuleOp module);
}; };
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -146,22 +145,10 @@ using MemPortDict = llvm::SmallDenseMap<Operation *, MemPort, 8>;
// For storing MemPort indexed by the pipeline stage (a basic block). // For storing MemPort indexed by the pipeline stage (a basic block).
using MemPortDictList = SmallVector<MemPortDict, 16>; using MemPortDictList = SmallVector<MemPortDict, 16>;
// For storing loop induction information.
struct InductionInfo {
InductionInfo(unsigned lowerBound, unsigned upperBound, unsigned step)
: lowerBound(lowerBound), upperBound(upperBound), step(step) {}
unsigned lowerBound;
unsigned upperBound;
unsigned step;
};
using InductionInfoList = SmallVector<InductionInfo, 8>;
class HLSCppEstimator : public HLSCppVisitorBase<HLSCppEstimator, bool>, class HLSCppEstimator : public HLSCppVisitorBase<HLSCppEstimator, bool>,
public HLSCppToolBase { public HLSCppToolBase {
public: public:
explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath, explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath);
std::string opLatencyPath);
bool visitUnhandledOp(Operation *op) { return true; } bool visitUnhandledOp(Operation *op) { return true; }
@ -184,7 +171,6 @@ public:
void estimateOperation(Operation *op); void estimateOperation(Operation *op);
void estimateFunc(FuncOp func); void estimateFunc(FuncOp func);
void estimateBlock(Block &block); void estimateBlock(Block &block);
void estimateModule(ModuleOp module);
}; };
} // namespace scalehls } // namespace scalehls

View File

@ -5,6 +5,8 @@
#include "Analysis/QoREstimation.h" #include "Analysis/QoREstimation.h"
#include "Analysis/Passes.h" #include "Analysis/Passes.h"
#include "Dialect/HLSCpp/HLSCpp.h" #include "Dialect/HLSCpp/HLSCpp.h"
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/IR/Operation.h" #include "mlir/IR/Operation.h"
#include "mlir/IR/PatternMatch.h" #include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@ -115,40 +117,23 @@ void HLSCppAnalyzer::analyzeFunc(FuncOp func) {
analyzeBlock(func.front()); analyzeBlock(func.front());
} }
void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
for (auto &op : module) {
if (auto func = dyn_cast<FuncOp>(op)) {
analyzeFunc(func);
} else if (!isa<ModuleTerminatorOp>(op))
op.emitError("is unsupported operation.");
}
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// HLSCppEstimator Class Definition // HLSCppEstimator Class Definition
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// Estimator constructor. /// Estimator constructor.
HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath, HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath)
string opLatencyPath)
: HLSCppToolBase(builder) { : HLSCppToolBase(builder) {
/*
INIReader targetSpec(targetSpecPath); INIReader targetSpec(targetSpecPath);
if (targetSpec.ParseError()) if (targetSpec.ParseError())
llvm::outs() << "error: target spec file parse fail, please refer to " llvm::outs() << "error: target spec file parse fail, please refer to "
"--help option and pass in correct file path\n"; "--help option and pass in correct file path\n";
INIReader opLatency(opLatencyPath);
if (opLatency.ParseError())
llvm::outs() << "error: Op latency file parse fail, please refer to "
"--help option and pass in correct file path\n";
// TODO: Support estimator initiation from profiling data. // TODO: Support estimator initiation from profiling data.
auto freq = targetSpec.Get("spec", "frequency", "200MHz"); auto freq = targetSpec.Get("spec", "frequency", "200MHz");
auto latency = opLatency.GetInteger(freq, "op", 0); auto latency = targetSpec.GetInteger(freq, "op", 0);
llvm::outs() << latency << "\n"; llvm::outs() << latency << "\n";
*/
} }
/// Calculate the partition index according to the affine map of a memory access /// Calculate the partition index according to the affine map of a memory access
@ -529,15 +514,6 @@ void HLSCppEstimator::estimateFunc(FuncOp func) {
setAttrValue(func, "latency", latency); setAttrValue(func, "latency", latency);
} }
void HLSCppEstimator::estimateModule(ModuleOp module) {
for (auto &op : module) {
if (auto func = dyn_cast<FuncOp>(op)) {
estimateFunc(func);
} else if (!isa<ModuleTerminatorOp>(op))
op.emitError("is unsupported operation.");
}
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Entry of scalehls-opt // Entry of scalehls-opt
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -549,7 +525,7 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
// Extract all static parameters and current pragma configurations. // Extract all static parameters and current pragma configurations.
HLSCppAnalyzer analyzer(builder); HLSCppAnalyzer analyzer(builder);
analyzer.analyzeModule(getOperation()); analyzer.analyzeFunc(getOperation());
// Canonicalize the analyzed IR. // Canonicalize the analyzed IR.
OwningRewritePatternList patterns; OwningRewritePatternList patterns;
@ -562,8 +538,8 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
applyPatternsAndFoldGreedily(op->getRegions(), std::move(patterns)); applyPatternsAndFoldGreedily(op->getRegions(), std::move(patterns));
// Estimate performance and resource utilization. // Estimate performance and resource utilization.
HLSCppEstimator estimator(builder, targetSpec, opLatency); HLSCppEstimator estimator(builder, targetSpec);
estimator.estimateModule(getOperation()); estimator.estimateFunc(getOperation());
} }
}; };
} // namespace } // namespace

View File

@ -41,38 +41,34 @@ void RemoveVarLoopBound::runOnOperation() {
// For now, only if the variable bound is the induction variable of // For now, only if the variable bound is the induction variable of
// one of the outer loops, the removal is possible. // one of the outer loops, the removal is possible.
unsigned idx = 0; unsigned idx = 0;
for (auto inductionVar : inductionVars) { if (auto valOwner = getForInductionVarOwner(val)) {
if (val == inductionVar) { if (valOwner.hasConstantUpperBound()) {
if (nestedLoops[idx].hasConstantUpperBound()) { // Set new constant loop bound.
// Set new constant loop bound. auto maximum = valOwner.getConstantUpperBound();
auto maximum = nestedLoops[idx].getConstantUpperBound(); loop.setConstantUpperBound(maximum);
loop.setConstantUpperBound(maximum);
// Collect all components for creating AffineIf operation. // Collect all components for creating AffineIf operation.
auto ifExpr = getAffineDimExpr(0, func.getContext()) - auto ifExpr = getAffineDimExpr(0, func.getContext()) -
getAffineDimExpr(1, func.getContext()) - getAffineDimExpr(1, func.getContext()) -
getAffineConstantExpr(1, func.getContext()); getAffineConstantExpr(1, func.getContext());
auto ifCondition = auto ifCondition = IntegerSet::get(2, 0, ifExpr, /*eqFlags=*/false);
IntegerSet::get(2, 0, ifExpr, /*eqFlags=*/false);
// Create AffineIf operation in the front of the innermost // Create AffineIf operation in the front of the innermost
// perfect loop. // perfect loop.
builder.setInsertionPointToStart(nestedLoops.back().getBody()); builder.setInsertionPointToStart(nestedLoops.back().getBody());
auto ifOp = builder.create<mlir::AffineIfOp>( auto ifOp = builder.create<mlir::AffineIfOp>(
func.getLoc(), ifCondition, func.getLoc(), ifCondition,
ArrayRef<Value>({val, loop.getInductionVar()}), ArrayRef<Value>({val, loop.getInductionVar()}),
/*withElseRegion*/ false); /*withElseRegion*/ false);
// Move all operations in the innermost perfect loop into the // Move all operations in the innermost perfect loop into the
// new created AffineIf region. // new created AffineIf region.
auto &ifBlock = ifOp.getBody()->getOperations(); auto &ifBlock = ifOp.getBody()->getOperations();
auto &loopBlock = nestedLoops.back().getBody()->getOperations(); auto &loopBlock = nestedLoops.back().getBody()->getOperations();
ifBlock.splice(ifBlock.begin(), loopBlock, ifBlock.splice(ifBlock.begin(), loopBlock,
std::next(loopBlock.begin()), std::next(loopBlock.begin()),
std::prev(loopBlock.end(), 1)); std::prev(loopBlock.end(), 1));
}
} }
idx += 1;
} }
} }
inductionVars.push_back(loop.getInductionVar()); inductionVars.push_back(loop.getInductionVar());

View File

@ -10,9 +10,9 @@ func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) attribu
%0 = affine.load %array0[%i, %j, %k] : memref<16x4x4xindex> %0 = affine.load %array0[%i, %j, %k] : memref<16x4x4xindex>
%1 = affine.load %array1[%i, %j, %k] : memref<16x4x4xindex> %1 = affine.load %array1[%i, %j, %k] : memref<16x4x4xindex>
%2 = addi %0, %1 : index %2 = addi %0, %1 : index
affine.store %2, %array1[%i, %j, %k] : memref<16x4x4xindex> affine.store %2, %array1[%i, %j, %k + 2] : memref<16x4x4xindex>
} {pipeline = false, unroll = false, flatten = false} } {pipeline = true, unroll = false, flatten = false}
} {pipeline = true, unroll = false, flatten = false} } {pipeline = false, unroll = false, flatten = false}
} {pipeline = false, unroll = false, flatten = false} } {pipeline = false, unroll = false, flatten = false}
return return
} }