[RemoveVarLoopBound] simplify impl logic; [QoREstimation] update to FuncOp pass, remove op-latency configuration

This commit is contained in:
Hanchen Ye 2020-12-13 22:07:04 -06:00
parent 142ffadd14
commit 31ce83be83
7 changed files with 60 additions and 104 deletions

View File

@ -1,23 +0,0 @@
[200MHz]
op=1
# define INT_ADD 0.5
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
# define IMULT 7.0 //actual 7.0
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
# define IDIV 36.0 //not chain
# define U_DIV 7.0 //actual 7.0 same with imul
# define UDIV 36.0
# define FP_ADD 8.0 //not chain
# define FP_MULT 5.0 //not chain
# define FP_DIV 16.0 //not chain
# define SI_TO_FP 6.0
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
# define SHIFT 0.2
# define ALLOCA_LATENCY 1.0
# define GEP_LATENCY 1.0
# define CAST_LATENCY 0.4
# define PHI_LATENCY 1.5
# define ICMP_LATENCY 0.5
# define FCMP_LATENCY 8.0//0.5
# define SELECT_LATENCY 0.2
# define CALL_LATENCY 1.0

View File

@ -1,2 +1,26 @@
[spec]
frequency=200MHz
[200MHz]
op=2333
# define INT_ADD 0.5
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
# define IMULT 7.0 //actual 7.0
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
# define IDIV 36.0 //not chain
# define U_DIV 7.0 //actual 7.0 same with imul
# define UDIV 36.0
# define FP_ADD 8.0 //not chain
# define FP_MULT 5.0 //not chain
# define FP_DIV 16.0 //not chain
# define SI_TO_FP 6.0
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
# define SHIFT 0.2
# define ALLOCA_LATENCY 1.0
# define GEP_LATENCY 1.0
# define CAST_LATENCY 0.4
# define PHI_LATENCY 1.5
# define ICMP_LATENCY 0.5
# define FCMP_LATENCY 8.0//0.5
# define SELECT_LATENCY 0.2
# define CALL_LATENCY 1.0

View File

@ -7,7 +7,7 @@
include "mlir/Pass/PassBase.td"
def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
def QoREstimation : Pass<"qor-estimation", "FuncOp"> {
let summary = "Estimate the performance and resource utilization";
let description = [{
This qor-estimation pass will analyze the input CDFG and pragma operations
@ -21,9 +21,6 @@ def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
Option<"targetSpec", "target-spec", "std::string",
/*default=*/"\"../config/target-spec.ini\"",
"File path: target backend specifications and configurations">,
Option<"opLatency", "op-latency", "std::string",
/*default=*/"\"../config/op-latency.ini\"",
"File path: profiling data for operation latency">
];
}

View File

@ -117,7 +117,6 @@ public:
void analyzeBlock(Block &block);
void analyzeFunc(FuncOp func);
void analyzeModule(ModuleOp module);
};
//===----------------------------------------------------------------------===//
@ -146,22 +145,10 @@ using MemPortDict = llvm::SmallDenseMap<Operation *, MemPort, 8>;
// For storing MemPort indexed by the pipeline stage (a basic block).
using MemPortDictList = SmallVector<MemPortDict, 16>;
// For storing loop induction information.
struct InductionInfo {
InductionInfo(unsigned lowerBound, unsigned upperBound, unsigned step)
: lowerBound(lowerBound), upperBound(upperBound), step(step) {}
unsigned lowerBound;
unsigned upperBound;
unsigned step;
};
using InductionInfoList = SmallVector<InductionInfo, 8>;
class HLSCppEstimator : public HLSCppVisitorBase<HLSCppEstimator, bool>,
public HLSCppToolBase {
public:
explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath,
std::string opLatencyPath);
explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath);
bool visitUnhandledOp(Operation *op) { return true; }
@ -184,7 +171,6 @@ public:
void estimateOperation(Operation *op);
void estimateFunc(FuncOp func);
void estimateBlock(Block &block);
void estimateModule(ModuleOp module);
};
} // namespace scalehls

View File

@ -5,6 +5,8 @@
#include "Analysis/QoREstimation.h"
#include "Analysis/Passes.h"
#include "Dialect/HLSCpp/HLSCpp.h"
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@ -115,40 +117,23 @@ void HLSCppAnalyzer::analyzeFunc(FuncOp func) {
analyzeBlock(func.front());
}
void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
for (auto &op : module) {
if (auto func = dyn_cast<FuncOp>(op)) {
analyzeFunc(func);
} else if (!isa<ModuleTerminatorOp>(op))
op.emitError("is unsupported operation.");
}
}
//===----------------------------------------------------------------------===//
// HLSCppEstimator Class Definition
//===----------------------------------------------------------------------===//
/// Estimator constructor.
HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath,
string opLatencyPath)
HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath)
: HLSCppToolBase(builder) {
/*
INIReader targetSpec(targetSpecPath);
if (targetSpec.ParseError())
llvm::outs() << "error: target spec file parse fail, please refer to "
"--help option and pass in correct file path\n";
INIReader opLatency(opLatencyPath);
if (opLatency.ParseError())
llvm::outs() << "error: Op latency file parse fail, please refer to "
"--help option and pass in correct file path\n";
// TODO: Support estimator initiation from profiling data.
auto freq = targetSpec.Get("spec", "frequency", "200MHz");
auto latency = opLatency.GetInteger(freq, "op", 0);
auto latency = targetSpec.GetInteger(freq, "op", 0);
llvm::outs() << latency << "\n";
*/
}
/// Calculate the partition index according to the affine map of a memory access
@ -529,15 +514,6 @@ void HLSCppEstimator::estimateFunc(FuncOp func) {
setAttrValue(func, "latency", latency);
}
void HLSCppEstimator::estimateModule(ModuleOp module) {
for (auto &op : module) {
if (auto func = dyn_cast<FuncOp>(op)) {
estimateFunc(func);
} else if (!isa<ModuleTerminatorOp>(op))
op.emitError("is unsupported operation.");
}
}
//===----------------------------------------------------------------------===//
// Entry of scalehls-opt
//===----------------------------------------------------------------------===//
@ -549,7 +525,7 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
// Extract all static parameters and current pragma configurations.
HLSCppAnalyzer analyzer(builder);
analyzer.analyzeModule(getOperation());
analyzer.analyzeFunc(getOperation());
// Canonicalize the analyzed IR.
OwningRewritePatternList patterns;
@ -562,8 +538,8 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
applyPatternsAndFoldGreedily(op->getRegions(), std::move(patterns));
// Estimate performance and resource utilization.
HLSCppEstimator estimator(builder, targetSpec, opLatency);
estimator.estimateModule(getOperation());
HLSCppEstimator estimator(builder, targetSpec);
estimator.estimateFunc(getOperation());
}
};
} // namespace

View File

@ -41,38 +41,34 @@ void RemoveVarLoopBound::runOnOperation() {
// For now, only if the variable bound is the induction variable of
// one of the outer loops, the removal is possible.
unsigned idx = 0;
for (auto inductionVar : inductionVars) {
if (val == inductionVar) {
if (nestedLoops[idx].hasConstantUpperBound()) {
// Set new constant loop bound.
auto maximum = nestedLoops[idx].getConstantUpperBound();
loop.setConstantUpperBound(maximum);
if (auto valOwner = getForInductionVarOwner(val)) {
if (valOwner.hasConstantUpperBound()) {
// Set new constant loop bound.
auto maximum = valOwner.getConstantUpperBound();
loop.setConstantUpperBound(maximum);
// Collect all components for creating AffineIf operation.
auto ifExpr = getAffineDimExpr(0, func.getContext()) -
getAffineDimExpr(1, func.getContext()) -
getAffineConstantExpr(1, func.getContext());
auto ifCondition =
IntegerSet::get(2, 0, ifExpr, /*eqFlags=*/false);
// Collect all components for creating AffineIf operation.
auto ifExpr = getAffineDimExpr(0, func.getContext()) -
getAffineDimExpr(1, func.getContext()) -
getAffineConstantExpr(1, func.getContext());
auto ifCondition = IntegerSet::get(2, 0, ifExpr, /*eqFlags=*/false);
// Create AffineIf operation in the front of the innermost
// perfect loop.
builder.setInsertionPointToStart(nestedLoops.back().getBody());
auto ifOp = builder.create<mlir::AffineIfOp>(
func.getLoc(), ifCondition,
ArrayRef<Value>({val, loop.getInductionVar()}),
/*withElseRegion*/ false);
// Create AffineIf operation in the front of the innermost
// perfect loop.
builder.setInsertionPointToStart(nestedLoops.back().getBody());
auto ifOp = builder.create<mlir::AffineIfOp>(
func.getLoc(), ifCondition,
ArrayRef<Value>({val, loop.getInductionVar()}),
/*withElseRegion*/ false);
// Move all operations in the innermost perfect loop into the
// new created AffineIf region.
auto &ifBlock = ifOp.getBody()->getOperations();
auto &loopBlock = nestedLoops.back().getBody()->getOperations();
ifBlock.splice(ifBlock.begin(), loopBlock,
std::next(loopBlock.begin()),
std::prev(loopBlock.end(), 1));
}
// Move all operations in the innermost perfect loop into the
// new created AffineIf region.
auto &ifBlock = ifOp.getBody()->getOperations();
auto &loopBlock = nestedLoops.back().getBody()->getOperations();
ifBlock.splice(ifBlock.begin(), loopBlock,
std::next(loopBlock.begin()),
std::prev(loopBlock.end(), 1));
}
idx += 1;
}
}
inductionVars.push_back(loop.getInductionVar());

View File

@ -10,9 +10,9 @@ func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) attribu
%0 = affine.load %array0[%i, %j, %k] : memref<16x4x4xindex>
%1 = affine.load %array1[%i, %j, %k] : memref<16x4x4xindex>
%2 = addi %0, %1 : index
affine.store %2, %array1[%i, %j, %k] : memref<16x4x4xindex>
} {pipeline = false, unroll = false, flatten = false}
} {pipeline = true, unroll = false, flatten = false}
affine.store %2, %array1[%i, %j, %k + 2] : memref<16x4x4xindex>
} {pipeline = true, unroll = false, flatten = false}
} {pipeline = false, unroll = false, flatten = false}
} {pipeline = false, unroll = false, flatten = false}
return
}