[RemoveVarLoopBound] simplify impl logic; [QoREstimation] update to FuncOp pass, remove op-latency configuration
This commit is contained in:
parent
142ffadd14
commit
31ce83be83
|
@ -1,23 +0,0 @@
|
|||
[200MHz]
|
||||
op=1
|
||||
# define INT_ADD 0.5
|
||||
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
|
||||
# define IMULT 7.0 //actual 7.0
|
||||
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
|
||||
# define IDIV 36.0 //not chain
|
||||
# define U_DIV 7.0 //actual 7.0 same with imul
|
||||
# define UDIV 36.0
|
||||
# define FP_ADD 8.0 //not chain
|
||||
# define FP_MULT 5.0 //not chain
|
||||
# define FP_DIV 16.0 //not chain
|
||||
# define SI_TO_FP 6.0
|
||||
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
|
||||
# define SHIFT 0.2
|
||||
# define ALLOCA_LATENCY 1.0
|
||||
# define GEP_LATENCY 1.0
|
||||
# define CAST_LATENCY 0.4
|
||||
# define PHI_LATENCY 1.5
|
||||
# define ICMP_LATENCY 0.5
|
||||
# define FCMP_LATENCY 8.0//0.5
|
||||
# define SELECT_LATENCY 0.2
|
||||
# define CALL_LATENCY 1.0
|
|
@ -1,2 +1,26 @@
|
|||
[spec]
|
||||
frequency=200MHz
|
||||
|
||||
[200MHz]
|
||||
op=2333
|
||||
# define INT_ADD 0.5
|
||||
# define INT_MULT 5.0 //actual 5.0 //load and write can be chained with mul,etc. Therefore estimate the effective latency.
|
||||
# define IMULT 7.0 //actual 7.0
|
||||
# define INT_DIV 8.0 //actual 8.0 //div can chain with load, cannot chain with other operations.
|
||||
# define IDIV 36.0 //not chain
|
||||
# define U_DIV 7.0 //actual 7.0 same with imul
|
||||
# define UDIV 36.0
|
||||
# define FP_ADD 8.0 //not chain
|
||||
# define FP_MULT 5.0 //not chain
|
||||
# define FP_DIV 16.0 //not chain
|
||||
# define SI_TO_FP 6.0
|
||||
# define FP_TO_SI 2.5 //after casting, there is a select for div, so add 0.5.
|
||||
# define SHIFT 0.2
|
||||
# define ALLOCA_LATENCY 1.0
|
||||
# define GEP_LATENCY 1.0
|
||||
# define CAST_LATENCY 0.4
|
||||
# define PHI_LATENCY 1.5
|
||||
# define ICMP_LATENCY 0.5
|
||||
# define FCMP_LATENCY 8.0//0.5
|
||||
# define SELECT_LATENCY 0.2
|
||||
# define CALL_LATENCY 1.0
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
include "mlir/Pass/PassBase.td"
|
||||
|
||||
def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
|
||||
def QoREstimation : Pass<"qor-estimation", "FuncOp"> {
|
||||
let summary = "Estimate the performance and resource utilization";
|
||||
let description = [{
|
||||
This qor-estimation pass will analyze the input CDFG and pragma operations
|
||||
|
@ -21,9 +21,6 @@ def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
|
|||
Option<"targetSpec", "target-spec", "std::string",
|
||||
/*default=*/"\"../config/target-spec.ini\"",
|
||||
"File path: target backend specifications and configurations">,
|
||||
Option<"opLatency", "op-latency", "std::string",
|
||||
/*default=*/"\"../config/op-latency.ini\"",
|
||||
"File path: profiling data for operation latency">
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
@ -117,7 +117,6 @@ public:
|
|||
|
||||
void analyzeBlock(Block &block);
|
||||
void analyzeFunc(FuncOp func);
|
||||
void analyzeModule(ModuleOp module);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -146,22 +145,10 @@ using MemPortDict = llvm::SmallDenseMap<Operation *, MemPort, 8>;
|
|||
// For storing MemPort indexed by the pipeline stage (a basic block).
|
||||
using MemPortDictList = SmallVector<MemPortDict, 16>;
|
||||
|
||||
// For storing loop induction information.
|
||||
struct InductionInfo {
|
||||
InductionInfo(unsigned lowerBound, unsigned upperBound, unsigned step)
|
||||
: lowerBound(lowerBound), upperBound(upperBound), step(step) {}
|
||||
|
||||
unsigned lowerBound;
|
||||
unsigned upperBound;
|
||||
unsigned step;
|
||||
};
|
||||
using InductionInfoList = SmallVector<InductionInfo, 8>;
|
||||
|
||||
class HLSCppEstimator : public HLSCppVisitorBase<HLSCppEstimator, bool>,
|
||||
public HLSCppToolBase {
|
||||
public:
|
||||
explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath,
|
||||
std::string opLatencyPath);
|
||||
explicit HLSCppEstimator(OpBuilder &builder, std::string targetSpecPath);
|
||||
|
||||
bool visitUnhandledOp(Operation *op) { return true; }
|
||||
|
||||
|
@ -184,7 +171,6 @@ public:
|
|||
void estimateOperation(Operation *op);
|
||||
void estimateFunc(FuncOp func);
|
||||
void estimateBlock(Block &block);
|
||||
void estimateModule(ModuleOp module);
|
||||
};
|
||||
|
||||
} // namespace scalehls
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include "Analysis/QoREstimation.h"
|
||||
#include "Analysis/Passes.h"
|
||||
#include "Dialect/HLSCpp/HLSCpp.h"
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
#include "mlir/Analysis/AffineStructures.h"
|
||||
#include "mlir/IR/Operation.h"
|
||||
#include "mlir/IR/PatternMatch.h"
|
||||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
||||
|
@ -115,40 +117,23 @@ void HLSCppAnalyzer::analyzeFunc(FuncOp func) {
|
|||
analyzeBlock(func.front());
|
||||
}
|
||||
|
||||
void HLSCppAnalyzer::analyzeModule(ModuleOp module) {
|
||||
for (auto &op : module) {
|
||||
if (auto func = dyn_cast<FuncOp>(op)) {
|
||||
analyzeFunc(func);
|
||||
} else if (!isa<ModuleTerminatorOp>(op))
|
||||
op.emitError("is unsupported operation.");
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// HLSCppEstimator Class Definition
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Estimator constructor.
|
||||
HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath,
|
||||
string opLatencyPath)
|
||||
HLSCppEstimator::HLSCppEstimator(OpBuilder &builder, string targetSpecPath)
|
||||
: HLSCppToolBase(builder) {
|
||||
|
||||
/*
|
||||
INIReader targetSpec(targetSpecPath);
|
||||
if (targetSpec.ParseError())
|
||||
llvm::outs() << "error: target spec file parse fail, please refer to "
|
||||
"--help option and pass in correct file path\n";
|
||||
|
||||
INIReader opLatency(opLatencyPath);
|
||||
if (opLatency.ParseError())
|
||||
llvm::outs() << "error: Op latency file parse fail, please refer to "
|
||||
"--help option and pass in correct file path\n";
|
||||
|
||||
// TODO: Support estimator initiation from profiling data.
|
||||
auto freq = targetSpec.Get("spec", "frequency", "200MHz");
|
||||
auto latency = opLatency.GetInteger(freq, "op", 0);
|
||||
auto latency = targetSpec.GetInteger(freq, "op", 0);
|
||||
llvm::outs() << latency << "\n";
|
||||
*/
|
||||
}
|
||||
|
||||
/// Calculate the partition index according to the affine map of a memory access
|
||||
|
@ -529,15 +514,6 @@ void HLSCppEstimator::estimateFunc(FuncOp func) {
|
|||
setAttrValue(func, "latency", latency);
|
||||
}
|
||||
|
||||
void HLSCppEstimator::estimateModule(ModuleOp module) {
|
||||
for (auto &op : module) {
|
||||
if (auto func = dyn_cast<FuncOp>(op)) {
|
||||
estimateFunc(func);
|
||||
} else if (!isa<ModuleTerminatorOp>(op))
|
||||
op.emitError("is unsupported operation.");
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Entry of scalehls-opt
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -549,7 +525,7 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
|||
|
||||
// Extract all static parameters and current pragma configurations.
|
||||
HLSCppAnalyzer analyzer(builder);
|
||||
analyzer.analyzeModule(getOperation());
|
||||
analyzer.analyzeFunc(getOperation());
|
||||
|
||||
// Canonicalize the analyzed IR.
|
||||
OwningRewritePatternList patterns;
|
||||
|
@ -562,8 +538,8 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
|
|||
applyPatternsAndFoldGreedily(op->getRegions(), std::move(patterns));
|
||||
|
||||
// Estimate performance and resource utilization.
|
||||
HLSCppEstimator estimator(builder, targetSpec, opLatency);
|
||||
estimator.estimateModule(getOperation());
|
||||
HLSCppEstimator estimator(builder, targetSpec);
|
||||
estimator.estimateFunc(getOperation());
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
|
|
@ -41,38 +41,34 @@ void RemoveVarLoopBound::runOnOperation() {
|
|||
// For now, only if the variable bound is the induction variable of
|
||||
// one of the outer loops, the removal is possible.
|
||||
unsigned idx = 0;
|
||||
for (auto inductionVar : inductionVars) {
|
||||
if (val == inductionVar) {
|
||||
if (nestedLoops[idx].hasConstantUpperBound()) {
|
||||
// Set new constant loop bound.
|
||||
auto maximum = nestedLoops[idx].getConstantUpperBound();
|
||||
loop.setConstantUpperBound(maximum);
|
||||
if (auto valOwner = getForInductionVarOwner(val)) {
|
||||
if (valOwner.hasConstantUpperBound()) {
|
||||
// Set new constant loop bound.
|
||||
auto maximum = valOwner.getConstantUpperBound();
|
||||
loop.setConstantUpperBound(maximum);
|
||||
|
||||
// Collect all components for creating AffineIf operation.
|
||||
auto ifExpr = getAffineDimExpr(0, func.getContext()) -
|
||||
getAffineDimExpr(1, func.getContext()) -
|
||||
getAffineConstantExpr(1, func.getContext());
|
||||
auto ifCondition =
|
||||
IntegerSet::get(2, 0, ifExpr, /*eqFlags=*/false);
|
||||
// Collect all components for creating AffineIf operation.
|
||||
auto ifExpr = getAffineDimExpr(0, func.getContext()) -
|
||||
getAffineDimExpr(1, func.getContext()) -
|
||||
getAffineConstantExpr(1, func.getContext());
|
||||
auto ifCondition = IntegerSet::get(2, 0, ifExpr, /*eqFlags=*/false);
|
||||
|
||||
// Create AffineIf operation in the front of the innermost
|
||||
// perfect loop.
|
||||
builder.setInsertionPointToStart(nestedLoops.back().getBody());
|
||||
auto ifOp = builder.create<mlir::AffineIfOp>(
|
||||
func.getLoc(), ifCondition,
|
||||
ArrayRef<Value>({val, loop.getInductionVar()}),
|
||||
/*withElseRegion*/ false);
|
||||
// Create AffineIf operation in the front of the innermost
|
||||
// perfect loop.
|
||||
builder.setInsertionPointToStart(nestedLoops.back().getBody());
|
||||
auto ifOp = builder.create<mlir::AffineIfOp>(
|
||||
func.getLoc(), ifCondition,
|
||||
ArrayRef<Value>({val, loop.getInductionVar()}),
|
||||
/*withElseRegion*/ false);
|
||||
|
||||
// Move all operations in the innermost perfect loop into the
|
||||
// new created AffineIf region.
|
||||
auto &ifBlock = ifOp.getBody()->getOperations();
|
||||
auto &loopBlock = nestedLoops.back().getBody()->getOperations();
|
||||
ifBlock.splice(ifBlock.begin(), loopBlock,
|
||||
std::next(loopBlock.begin()),
|
||||
std::prev(loopBlock.end(), 1));
|
||||
}
|
||||
// Move all operations in the innermost perfect loop into the
|
||||
// new created AffineIf region.
|
||||
auto &ifBlock = ifOp.getBody()->getOperations();
|
||||
auto &loopBlock = nestedLoops.back().getBody()->getOperations();
|
||||
ifBlock.splice(ifBlock.begin(), loopBlock,
|
||||
std::next(loopBlock.begin()),
|
||||
std::prev(loopBlock.end(), 1));
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
}
|
||||
inductionVars.push_back(loop.getInductionVar());
|
||||
|
|
|
@ -10,9 +10,9 @@ func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) attribu
|
|||
%0 = affine.load %array0[%i, %j, %k] : memref<16x4x4xindex>
|
||||
%1 = affine.load %array1[%i, %j, %k] : memref<16x4x4xindex>
|
||||
%2 = addi %0, %1 : index
|
||||
affine.store %2, %array1[%i, %j, %k] : memref<16x4x4xindex>
|
||||
} {pipeline = false, unroll = false, flatten = false}
|
||||
} {pipeline = true, unroll = false, flatten = false}
|
||||
affine.store %2, %array1[%i, %j, %k + 2] : memref<16x4x4xindex>
|
||||
} {pipeline = true, unroll = false, flatten = false}
|
||||
} {pipeline = false, unroll = false, flatten = false}
|
||||
} {pipeline = false, unroll = false, flatten = false}
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue