[RemoveVarLoopBound] simplify impl logic; [QoREstimation] update to FuncOp pass, remove op-latency configuration
This commit is contained in:
parent
142ffadd14
commit
31ce83be83
|
@ -1,23 +0,0 @@
|
||||||
[200MHz]
|
|
||||||
op=1
|
|
||||||
# define INT_ADD 0.5
|
|
||||||
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
|
|
||||||
# define IMULT 7.0 //actual 7.0
|
|
||||||
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
|
|
||||||
# define IDIV 36.0 //not chain
|
|
||||||
# define U_DIV 7.0 //actual 7.0 same with imul
|
|
||||||
# define UDIV 36.0
|
|
||||||
# define FP_ADD 8.0 //not chain
|
|
||||||
# define FP_MULT 5.0 //not chain
|
|
||||||
# define FP_DIV 16.0 //not chain
|
|
||||||
# define SI_TO_FP 6.0
|
|
||||||
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
|
|
||||||
# define SHIFT 0.2
|
|
||||||
# define ALLOCA_LATENCY 1.0
|
|
||||||
# define GEP_LATENCY 1.0
|
|
||||||
# define CAST_LATENCY 0.4
|
|
||||||
# define PHI_LATENCY 1.5
|
|
||||||
# define ICMP_LATENCY 0.5
|
|
||||||
# define FCMP_LATENCY 8.0//0.5
|
|
||||||
# define SELECT_LATENCY 0.2
|
|
||||||
# define CALL_LATENCY 1.0
|
|
|
@ -1,2 +1,26 @@
|
||||||
[spec]
|
[spec]
|
||||||
frequency=200MHz
|
frequency=200MHz
|
||||||
|
|
||||||
|
[200MHz]
|
||||||
|
op=2333
|
||||||
|
# define INT_ADD 0.5
|
||||||
|
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
|
||||||
|
# define IMULT 7.0 //actual 7.0
|
||||||
|
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
|
||||||
|
# define IDIV 36.0 //not chain
|
||||||
|
# define U_DIV 7.0 //actual 7.0 same with imul
|
||||||
|
# define UDIV 36.0
|
||||||
|
# define FP_ADD 8.0 //not chain
|
||||||
|
# define FP_MULT 5.0 //not chain
|
||||||
|
# define FP_DIV 16.0 //not chain
|
||||||
|
# define SI_TO_FP 6.0
|
||||||
|
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
|
||||||
|
# define SHIFT 0.2
|
||||||
|
# define ALLOCA_LATENCY 1.0
|
||||||
|
# define GEP_LATENCY 1.0
|
||||||
|
# define CAST_LATENCY 0.4
|
||||||
|
# define PHI_LATENCY 1.5
|
||||||
|
# define ICMP_LATENCY 0.5
|
||||||
|
# define FCMP_LATENCY 8.0//0.5
|
||||||
|
# define SELECT_LATENCY 0.2
|
||||||
|
# define CALL_LATENCY 1.0
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
include "mlir/Pass/PassBase.td"
|
include "mlir/Pass/PassBase.td"
|
||||||
|
|
||||||
def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
|
def QoREstimation : Pass<"qor-estimation", "FuncOp"> {
|
||||||
let summary = "Estimate the performance and resource utilization";
|
let summary = "Estimate the performance and resource utilization";
|
||||||
let description = [{
|
let description = [{
|
||||||
This qor-estimation pass will analyze the input CDFG and pragma operations
|
This qor-estimation pass will analyze the input CDFG and pragma operations
|
||||||
|
@ -21,9 +21,6 @@ def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
|
||||||
Option<"targetSpec", "target-spec", "std::string",
|
Option<"targetSpec", "target-spec", "std::string",
|
||||||
/*default=*/"\"../config/target-spec.ini\"",
|
/*default=*/"\"../config/target-spec.ini\"",
|
||||||
"File path: target backend specifications and configurations">,
|
"File path: target backend specifications and configurations">,
|
||||||
Option<"opLatency", "op-latency", "std::string",
|
|
||||||
/*default=*/"\"../config/op-latency.ini\"",
|
|
||||||
"File path: profiling data for operation latency">
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -117,7 +117,6 @@ public:
|
||||||
|
|
||||||
void analyzeBlock(Block &block);
|
void analyzeBlock(Block &block);
|
||||||
void analyzeFunc(FuncOp func);
|
void analyzeFunc(FuncOp func);
|
||||||
void analyzeModule(ModuleOp module);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -146,22 +145,10 @@ using MemPortDict = llvm::SmallDenseMap<Operation *, MemPort, 8>;
|
||||||
// For storing MemPort indexed by the pipeline stage (a basic block).
|
// For storing MemPort indexed by the pipeline stage (a basic block).
|
||||||
using MemPortDictList = SmallVector<MemPortDict, 16>;
|
using MemPortDictList = SmallVector<MemPortDict, 16>;
|
||||||
|
|
||||||
// For storing loop induction information.
|
|
||||||
struct InductionInfo {
|
|
||||||
InductionInfo(unsigned lowerBound, unsigned upperBound, unsigned step)
|
|
||||||
: lowerBound(lowerBound), upperBound(upperBound), step(step) {}
|
|
||||||
|
|
||||||
unsigned lowerBound;
|
|
||||||
unsigned upperBound;
|
|
||||||
unsigned step;
|
|
||||||
};
|
|
||||||
using InductionInfoList = SmallVector<InductionInfo, 8>;
|
|
||||||
|
|
||||||
class HLSCppEstimator : public HLSCppVisitorBase<HLSCppEstimator, bool>,
|
class HLSCppEstimator : public HLSCppVisitorBase<HLSCppEstimator, bool>,
|
||||||
public HLSCppToolBase {
|
public HLSCppToolBase {
|
||||||
public:
|
public:
|
||||||
explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath,
|
explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath);
|
||||||
std::string opLatencyPath);
|
|
||||||
|
|
||||||
bool visitUnhandledOp(Operation *op) { return true; }
|
bool visitUnhandledOp(Operation *op) { return true; }
|
||||||
|
|
||||||
|
@ -184,7 +171,6 @@ public:
|
||||||
void estimateOperation(Operation *op);
|
void estimateOperation(Operation *op);
|
||||||
void estimateFunc(FuncOp func);
|
void estimateFunc(FuncOp func);
|
||||||
void estimateBlock(Block &block);
|
void estimateBlock(Block &block);
|
||||||
void estimateModule(ModuleOp module);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace scalehls
|
} // namespace scalehls
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
#include "Analysis/QoREstimation.h"
|
#include "Analysis/QoREstimation.h"
|
||||||
#include "Analysis/Passes.h"
|
#include "Analysis/Passes.h"
|
||||||
#include "Dialect/HLSCpp/HLSCpp.h"
|
#include "Dialect/HLSCpp/HLSCpp.h"
|
||||||
|
#include "mlir/Analysis/AffineAnalysis.h"
|
||||||
|
#include "mlir/Analysis/AffineStructures.h"
|
||||||
#include "mlir/IR/Operation.h"
|
#include "mlir/IR/Operation.h"
|
||||||
#include "mlir/IR/PatternMatch.h"
|
#include "mlir/IR/PatternMatch.h"
|
||||||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
||||||
|
@ -115,40 +117,23 @@ void HLSCppAnalyzer::analyzeFunc(FuncOp func) {
|
||||||
analyzeBlock(func.front());
|
analyzeBlock(func.front());
|
||||||
}
|
}
|
||||||
|
|
||||||
void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
|
|
||||||
for (auto &op : module) {
|
|
||||||
if (auto func = dyn_cast<FuncOp>(op)) {
|
|
||||||
analyzeFunc(func);
|
|
||||||
} else if (!isa<ModuleTerminatorOp>(op))
|
|
||||||
op.emitError("is unsupported operation.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// HLSCppEstimator Class Definition
|
// HLSCppEstimator Class Definition
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
/// Estimator constructor.
|
/// Estimator constructor.
|
||||||
HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath,
|
HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath)
|
||||||
string opLatencyPath)
|
|
||||||
: HLSCppToolBase(builder) {
|
: HLSCppToolBase(builder) {
|
||||||
|
|
||||||
/*
|
|
||||||
INIReader targetSpec(targetSpecPath);
|
INIReader targetSpec(targetSpecPath);
|
||||||
if (targetSpec.ParseError())
|
if (targetSpec.ParseError())
|
||||||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||||
"--help option and pass in correct file path\n";
|
"--help option and pass in correct file path\n";
|
||||||
|
|
||||||
INIReader opLatency(opLatencyPath);
|
|
||||||
if (opLatency.ParseError())
|
|
||||||
llvm::outs() << "error: Op latency file parse fail, please refer to "
|
|
||||||
"--help option and pass in correct file path\n";
|
|
||||||
|
|
||||||
// TODO: Support estimator initiation from profiling data.
|
// TODO: Support estimator initiation from profiling data.
|
||||||
auto freq = targetSpec.Get("spec", "frequency", "200MHz");
|
auto freq = targetSpec.Get("spec", "frequency", "200MHz");
|
||||||
auto latency = opLatency.GetInteger(freq, "op", 0);
|
auto latency = targetSpec.GetInteger(freq, "op", 0);
|
||||||
llvm::outs() << latency << "\n";
|
llvm::outs() << latency << "\n";
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the partition index according to the affine map of a memory access
|
/// Calculate the partition index according to the affine map of a memory access
|
||||||
|
@ -529,15 +514,6 @@ void HLSCppEstimator::estimateFunc(FuncOp func) {
|
||||||
setAttrValue(func, "latency", latency);
|
setAttrValue(func, "latency", latency);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HLSCppEstimator::estimateModule(ModuleOp module) {
|
|
||||||
for (auto &op : module) {
|
|
||||||
if (auto func = dyn_cast<FuncOp>(op)) {
|
|
||||||
estimateFunc(func);
|
|
||||||
} else if (!isa<ModuleTerminatorOp>(op))
|
|
||||||
op.emitError("is unsupported operation.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Entry of scalehls-opt
|
// Entry of scalehls-opt
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -549,7 +525,7 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
||||||
|
|
||||||
// Extract all static parameters and current pragma configurations.
|
// Extract all static parameters and current pragma configurations.
|
||||||
HLSCppAnalyzer analyzer(builder);
|
HLSCppAnalyzer analyzer(builder);
|
||||||
analyzer.analyzeModule(getOperation());
|
analyzer.analyzeFunc(getOperation());
|
||||||
|
|
||||||
// Canonicalize the analyzed IR.
|
// Canonicalize the analyzed IR.
|
||||||
OwningRewritePatternList patterns;
|
OwningRewritePatternList patterns;
|
||||||
|
@ -562,8 +538,8 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
||||||
applyPatternsAndFoldGreedily(op->getRegions(), std::move(patterns));
|
applyPatternsAndFoldGreedily(op->getRegions(), std::move(patterns));
|
||||||
|
|
||||||
// Estimate performance and resource utilization.
|
// Estimate performance and resource utilization.
|
||||||
HLSCppEstimator estimator(builder, targetSpec, opLatency);
|
HLSCppEstimator estimator(builder, targetSpec);
|
||||||
estimator.estimateModule(getOperation());
|
estimator.estimateFunc(getOperation());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -41,38 +41,34 @@ void RemoveVarLoopBound::runOnOperation() {
|
||||||
// For now, only if the variable bound is the induction variable of
|
// For now, only if the variable bound is the induction variable of
|
||||||
// one of the outer loops, the removal is possible.
|
// one of the outer loops, the removal is possible.
|
||||||
unsigned idx = 0;
|
unsigned idx = 0;
|
||||||
for (auto inductionVar : inductionVars) {
|
if (auto valOwner = getForInductionVarOwner(val)) {
|
||||||
if (val == inductionVar) {
|
if (valOwner.hasConstantUpperBound()) {
|
||||||
if (nestedLoops[idx].hasConstantUpperBound()) {
|
// Set new constant loop bound.
|
||||||
// Set new constant loop bound.
|
auto maximum = valOwner.getConstantUpperBound();
|
||||||
auto maximum = nestedLoops[idx].getConstantUpperBound();
|
loop.setConstantUpperBound(maximum);
|
||||||
loop.setConstantUpperBound(maximum);
|
|
||||||
|
|
||||||
// Collect all components for creating AffineIf operation.
|
// Collect all components for creating AffineIf operation.
|
||||||
auto ifExpr = getAffineDimExpr(0, func.getContext()) -
|
auto ifExpr = getAffineDimExpr(0, func.getContext()) -
|
||||||
getAffineDimExpr(1, func.getContext()) -
|
getAffineDimExpr(1, func.getContext()) -
|
||||||
getAffineConstantExpr(1, func.getContext());
|
getAffineConstantExpr(1, func.getContext());
|
||||||
auto ifCondition =
|
auto ifCondition = IntegerSet::get(2, 0, ifExpr, /*eqFlags=*/false);
|
||||||
IntegerSet::get(2, 0, ifExpr, /*eqFlags=*/false);
|
|
||||||
|
|
||||||
// Create AffineIf operation in the front of the innermost
|
// Create AffineIf operation in the front of the innermost
|
||||||
// perfect loop.
|
// perfect loop.
|
||||||
builder.setInsertionPointToStart(nestedLoops.back().getBody());
|
builder.setInsertionPointToStart(nestedLoops.back().getBody());
|
||||||
auto ifOp = builder.create<mlir::AffineIfOp>(
|
auto ifOp = builder.create<mlir::AffineIfOp>(
|
||||||
func.getLoc(), ifCondition,
|
func.getLoc(), ifCondition,
|
||||||
ArrayRef<Value>({val, loop.getInductionVar()}),
|
ArrayRef<Value>({val, loop.getInductionVar()}),
|
||||||
/*withElseRegion*/ false);
|
/*withElseRegion*/ false);
|
||||||
|
|
||||||
// Move all operations in the innermost perfect loop into the
|
// Move all operations in the innermost perfect loop into the
|
||||||
// new created AffineIf region.
|
// new created AffineIf region.
|
||||||
auto &ifBlock = ifOp.getBody()->getOperations();
|
auto &ifBlock = ifOp.getBody()->getOperations();
|
||||||
auto &loopBlock = nestedLoops.back().getBody()->getOperations();
|
auto &loopBlock = nestedLoops.back().getBody()->getOperations();
|
||||||
ifBlock.splice(ifBlock.begin(), loopBlock,
|
ifBlock.splice(ifBlock.begin(), loopBlock,
|
||||||
std::next(loopBlock.begin()),
|
std::next(loopBlock.begin()),
|
||||||
std::prev(loopBlock.end(), 1));
|
std::prev(loopBlock.end(), 1));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
idx += 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inductionVars.push_back(loop.getInductionVar());
|
inductionVars.push_back(loop.getInductionVar());
|
||||||
|
|
|
@ -10,9 +10,9 @@ func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) attribu
|
||||||
%0 = affine.load %array0[%i, %j, %k] : memref<16x4x4xindex>
|
%0 = affine.load %array0[%i, %j, %k] : memref<16x4x4xindex>
|
||||||
%1 = affine.load %array1[%i, %j, %k] : memref<16x4x4xindex>
|
%1 = affine.load %array1[%i, %j, %k] : memref<16x4x4xindex>
|
||||||
%2 = addi %0, %1 : index
|
%2 = addi %0, %1 : index
|
||||||
affine.store %2, %array1[%i, %j, %k] : memref<16x4x4xindex>
|
affine.store %2, %array1[%i, %j, %k + 2] : memref<16x4x4xindex>
|
||||||
} {pipeline = false, unroll = false, flatten = false}
|
} {pipeline = true, unroll = false, flatten = false}
|
||||||
} {pipeline = true, unroll = false, flatten = false}
|
} {pipeline = false, unroll = false, flatten = false}
|
||||||
} {pipeline = false, unroll = false, flatten = false}
|
} {pipeline = false, unroll = false, flatten = false}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue