[QoREstimation] factor out QoREstimation.h (#20); [MultipleLevelDSE] start of auto dse

This commit is contained in:
Hanchen Ye 2021-01-08 02:20:11 -06:00
parent 9f31dd663d
commit ba3ca07833
10 changed files with 240 additions and 206 deletions

View File

@ -0,0 +1,124 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#ifndef SCALEHLS_ANALYSIS_QORESTIMATION_H
#define SCALEHLS_ANALYSIS_QORESTIMATION_H
#include "Analysis/Utils.h"
#include "Dialect/HLSCpp/Visitor.h"
#include "INIReader.h"
namespace mlir {
namespace scalehls {
using LatencyMap = llvm::StringMap<int64_t>;
void getLatencyMap(INIReader spec, LatencyMap &latencyMap);
class HLSCppEstimator
: public HLSCppVisitorBase<HLSCppEstimator, bool, int64_t>,
public HLSCppAnalysisBase {
public:
explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
: HLSCppAnalysisBase(OpBuilder(func)), func(func),
latencyMap(latencyMap) {
getFuncDependencies();
}
// For storing all dependencies indexed by the dependency source operation.
using Depends = SmallVector<Operation *, 16>;
using DependsMap = DenseMap<Operation *, Depends>;
// Indicate the unoccupied memory ports number.
struct PortInfo {
unsigned rdPort;
unsigned wrPort;
unsigned rdwrPort;
PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
: rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
};
// For storing ports number of all partitions indexed by the memref.
using Ports = SmallVector<PortInfo, 16>;
using PortsMap = DenseMap<Value, Ports>;
// For storing PortsMap indexed by the scheduling level.
using PortsMapDict = DenseMap<int64_t, PortsMap>;
// For storing the DSP resource utilization indexed by the schedule level.
using ResourceMap = DenseMap<int64_t, int64_t>;
/// Collect all dependencies detected in the function.
void getFuncDependencies();
void setScheduleValue(Operation *op, int64_t begin, int64_t end) {
setAttrValue(op, "schedule_begin", begin);
setAttrValue(op, "schedule_end", end);
}
using HLSCppVisitorBase::visitOp;
bool visitUnhandledOp(Operation *op, int64_t begin) {
// Default latency of any unhandled operation is 0.
setScheduleValue(op, begin, begin);
return true;
}
/// LoadOp and StoreOp related methods.
int64_t getPartitionIndex(Operation *op);
void estimateLoadStore(Operation *op, int64_t begin);
bool visitOp(AffineLoadOp op, int64_t begin) {
return estimateLoadStore(op, begin), true;
}
bool visitOp(AffineStoreOp op, int64_t begin) {
return estimateLoadStore(op, begin), true;
}
bool visitOp(LoadOp op, int64_t begin) {
setScheduleValue(op, begin, begin + 2);
return true;
}
bool visitOp(StoreOp op, int64_t begin) {
setScheduleValue(op, begin, begin + 1);
return true;
}
/// AffineForOp related methods.
// unsigned getOpMinII(AffineForOp forOp);
int64_t getResMinII(MemAccessesMap &map);
int64_t getDepMinII(AffineForOp forOp, MemAccessesMap &map);
bool visitOp(AffineForOp op, int64_t begin);
/// Other operation handlers.
bool visitOp(AffineIfOp op, int64_t begin);
bool visitOp(CallOp op, int64_t begin);
/// Handle operations with profiled latency.
#define HANDLE(OPTYPE, KEYNAME) \
bool visitOp(OPTYPE op, int64_t begin) { \
setScheduleValue(op, begin, begin + latencyMap[KEYNAME] + 1); \
return true; \
}
HANDLE(AddFOp, "fadd");
HANDLE(MulFOp, "fmul");
HANDLE(DivFOp, "fdiv");
HANDLE(CmpFOp, "fcmp");
#undef HANDLE
/// Block scheduler and estimator.
int64_t getResourceMap(Block &block, ResourceMap &addFMap,
ResourceMap &mulFMap);
int64_t estimateResource(Block &block);
Optional<std::pair<int64_t, int64_t>> estimateBlock(Block &block,
int64_t begin);
void reverseSchedule();
void estimateFunc();
FuncOp &func;
DependsMap dependsMap;
PortsMapDict portsMapDict;
LatencyMap &latencyMap;
};
} // namespace scalehls
} // namespace mlir
#endif // SCALEHLS_ANALYSIS_QORESTIMATION_H

View File

@ -71,9 +71,6 @@ using MemAccessesMap = DenseMap<Value, MemAccesses>;
void getMemAccessesMap(Block &block, MemAccessesMap &map, void getMemAccessesMap(Block &block, MemAccessesMap &map,
bool includeCalls = false); bool includeCalls = false);
Optional<std::pair<int64_t, int64_t>>
getBoundOfAffineBound(AffineBound bound, MLIRContext *context);
// Check if the lhsOp and rhsOp is at the same scheduling level. In this check, // Check if the lhsOp and rhsOp is at the same scheduling level. In this check,
// AffineIfOp is transparent. // AffineIfOp is transparent.
Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp, Optional<std::pair<Operation *, Operation *>> checkSameLevel(Operation *lhsOp,

View File

@ -44,16 +44,16 @@ bool applySimplifyMemrefAccess(FuncOp func);
/// Pragma optimization passes. /// Pragma optimization passes.
std::unique_ptr<Pass> createLoopPipeliningPass(); std::unique_ptr<Pass> createLoopPipeliningPass();
std::unique_ptr<Pass> createArrayPartitionPass(); std::unique_ptr<Pass> createArrayPartitionPass();
std::unique_ptr<Pass> createPragmaDSEPass(); std::unique_ptr<Pass> createMultipleLevelDSEPass();
/// Loop optimization passes. /// Loop optimization passes.
std::unique_ptr<Pass> createAffineLoopPerfectionPass(); std::unique_ptr<Pass> createAffineLoopPerfectionPass();
std::unique_ptr<Pass> createPartialAffineLoopTilePass();
std::unique_ptr<Pass> createRemoveVariableBoundPass(); std::unique_ptr<Pass> createRemoveVariableBoundPass();
std::unique_ptr<Pass> createPartialAffineLoopTilePass();
/// Dataflow optimization passes. /// Dataflow optimization passes.
std::unique_ptr<Pass> createSplitFunctionPass();
std::unique_ptr<Pass> createLegalizeDataflowPass(); std::unique_ptr<Pass> createLegalizeDataflowPass();
std::unique_ptr<Pass> createSplitFunctionPass();
/// Bufferization passes. /// Bufferization passes.
std::unique_ptr<Pass> createHLSKernelBufferizePass(); std::unique_ptr<Pass> createHLSKernelBufferizePass();

View File

@ -37,17 +37,23 @@ def LoopPipelining : Pass<"loop-pipelining", "FuncOp"> {
]; ];
} }
def PragmaDSE : Pass<"pragma-dse", "ModuleOp"> { def MultipleLevelDSE : Pass<"multiple-level-dse", "ModuleOp"> {
let summary = "Optimize pragma configurations"; let summary = "Optimize HLS design at multiple abstraction level";
let description = [{ let description = [{
This pragma-dse pass will automatically tune HLS pragma insertion and This multiple-level-dse pass will automatically conduct the design space
configuration for performance and area optimization. By calling methods exploration (DSE) across multiple abstraction levels. By calling methods
provided by qor-estimation, this pass is able to rapidly obtain the QoR provided by qor-estimation, this pass is able to rapidly obtain the QoR
estimation of the current design point, and feed it back to the design space estimation of the current design point, and feed it back to the DSE engine
exploration engine for an efficient convergence. for an efficient optimization convergence.
}]; }];
let constructor = "mlir::scalehls::createPragmaDSEPass()"; let constructor = "mlir::scalehls::createMultipleLevelDSEPass()";
let options = [
Option<"targetSpec", "target-spec", "std::string",
/*default=*/"\"../config/target-spec.ini\"",
"File path: target backend specifications and configurations">
];
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -2,10 +2,8 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "Analysis/QoREstimation.h"
#include "Analysis/Passes.h" #include "Analysis/Passes.h"
#include "Analysis/Utils.h"
#include "Dialect/HLSCpp/Visitor.h"
#include "INIReader.h"
#include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/LoopAnalysis.h" #include "mlir/Analysis/LoopAnalysis.h"
@ -18,117 +16,10 @@ using namespace mlir;
using namespace scalehls; using namespace scalehls;
using namespace hlscpp; using namespace hlscpp;
using LatencyMap = llvm::StringMap<int64_t>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// HLSCppEstimator Class // Initialization Methods
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
namespace {
class HLSCppEstimator
: public HLSCppVisitorBase<HLSCppEstimator, bool, int64_t>,
public HLSCppAnalysisBase {
public:
explicit HLSCppEstimator(FuncOp &func, LatencyMap &latencyMap)
: HLSCppAnalysisBase(OpBuilder(func)), func(func),
latencyMap(latencyMap) {
getFuncDependencies();
}
// For storing all dependencies indexed by the dependency source operation.
using Depends = SmallVector<Operation *, 16>;
using DependsMap = DenseMap<Operation *, Depends>;
// Indicate the unoccupied memory ports number.
struct PortInfo {
unsigned rdPort;
unsigned wrPort;
unsigned rdwrPort;
PortInfo(unsigned rdPort = 0, unsigned wrPort = 0, unsigned rdwrPort = 0)
: rdPort(rdPort), wrPort(wrPort), rdwrPort(rdwrPort) {}
};
// For storing ports number of all partitions indexed by the memref.
using Ports = SmallVector<PortInfo, 16>;
using PortsMap = DenseMap<Value, Ports>;
// For storing PortsMap indexed by the scheduling level.
using PortsMapDict = DenseMap<int64_t, PortsMap>;
// For storing the DSP resource utilization indexed by the schedule level.
using ResourceMap = DenseMap<int64_t, int64_t>;
/// Collect all dependencies detected in the function.
void getFuncDependencies();
void setScheduleValue(Operation *op, int64_t begin, int64_t end) {
setAttrValue(op, "schedule_begin", begin);
setAttrValue(op, "schedule_end", end);
}
using HLSCppVisitorBase::visitOp;
bool visitUnhandledOp(Operation *op, int64_t begin) {
// Default latency of any unhandled operation is 0.
setScheduleValue(op, begin, begin);
return true;
}
/// LoadOp and StoreOp related methods.
int64_t getPartitionIndex(Operation *op);
void estimateLoadStore(Operation *op, int64_t begin);
bool visitOp(AffineLoadOp op, int64_t begin) {
return estimateLoadStore(op, begin), true;
}
bool visitOp(AffineStoreOp op, int64_t begin) {
return estimateLoadStore(op, begin), true;
}
bool visitOp(LoadOp op, int64_t begin) {
setScheduleValue(op, begin, begin + 2);
return true;
}
bool visitOp(StoreOp op, int64_t begin) {
setScheduleValue(op, begin, begin + 1);
return true;
}
/// AffineForOp related methods.
// unsigned getOpMinII(AffineForOp forOp);
int64_t getResMinII(MemAccessesMap &map);
int64_t getDepMinII(AffineForOp forOp, MemAccessesMap &map);
bool visitOp(AffineForOp op, int64_t begin);
/// Other operation handlers.
bool visitOp(AffineIfOp op, int64_t begin);
bool visitOp(CallOp op, int64_t begin);
/// Handle operations with profiled latency.
#define HANDLE(OPTYPE, KEYNAME) \
bool visitOp(OPTYPE op, int64_t begin) { \
setScheduleValue(op, begin, begin + latencyMap[KEYNAME] + 1); \
return true; \
}
HANDLE(AddFOp, "fadd");
HANDLE(MulFOp, "fmul");
HANDLE(DivFOp, "fdiv");
HANDLE(CmpFOp, "fcmp");
#undef HANDLE
/// Block scheduler and estimator.
int64_t getResourceMap(Block &block, ResourceMap &addFMap,
ResourceMap &mulFMap);
int64_t estimateResource(Block &block);
Optional<std::pair<int64_t, int64_t>> estimateBlock(Block &block,
int64_t begin);
void reverseSchedule();
void estimateFunc();
FuncOp &func;
DependsMap dependsMap;
PortsMapDict portsMapDict;
LatencyMap &latencyMap;
};
} // namespace
/// Collect all dependencies detected in the function. /// Collect all dependencies detected in the function.
void HLSCppEstimator::getFuncDependencies() { void HLSCppEstimator::getFuncDependencies() {
MemAccessesMap map; MemAccessesMap map;
@ -812,8 +703,9 @@ void HLSCppEstimator::estimateFunc() {
// Entry of scalehls-opt // Entry of scalehls-opt
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
static void getLatencyMap(INIReader &spec, std::string freq, void scalehls::getLatencyMap(INIReader spec, LatencyMap &latencyMap) {
LatencyMap &latencyMap) { auto freq = spec.Get("specification", "frequency", "100MHz");
latencyMap["fadd"] = spec.GetInteger(freq, "fadd", 4); latencyMap["fadd"] = spec.GetInteger(freq, "fadd", 4);
latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3); latencyMap["fmul"] = spec.GetInteger(freq, "fmul", 3);
latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15); latencyMap["fdiv"] = spec.GetInteger(freq, "fdiv", 15);
@ -826,14 +718,12 @@ struct QoREstimation : public scalehls::QoREstimationBase<QoREstimation> {
// Read configuration file. // Read configuration file.
INIReader spec(targetSpec); INIReader spec(targetSpec);
if (spec.ParseError()) if (spec.ParseError())
emitError(getOperation().getLoc(), emitError(getOperation().getLoc(), "error: target spec file parse fail, "
"error: target spec file parse fail, please refer to " "please pass in correct file path\n");
"--help option and pass in correct file path\n");
// Collect profiling latency data. // Collect profiling latency data.
auto freq = spec.Get("specification", "frequency", "100MHz");
LatencyMap latencyMap; LatencyMap latencyMap;
getLatencyMap(spec, freq, latencyMap); getLatencyMap(spec, latencyMap);
// Estimate performance and resource utilization. // Estimate performance and resource utilization.
for (auto func : getOperation().getOps<FuncOp>()) for (auto func : getOperation().getOps<FuncOp>())

View File

@ -31,58 +31,6 @@ void scalehls::getMemAccessesMap(Block &block, MemAccessesMap &map,
} }
} }
Optional<std::pair<int64_t, int64_t>>
scalehls::getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
// For now, we can only handle one result affine bound.
if (bound.getMap().getNumResults() != 1)
return Optional<std::pair<int64_t, int64_t>>();
SmallVector<int64_t, 4> lbs;
SmallVector<int64_t, 4> ubs;
for (auto operand : bound.getOperands()) {
// Only if the affine bound operands are induction variable, the calculation
// is possible.
if (!isForInductionVar(operand))
return Optional<std::pair<int64_t, int64_t>>();
// Only if the owner for op of the induction variable has constant bound,
// the calculation is possible.
auto ifOp = getForInductionVarOwner(operand);
if (!ifOp.hasConstantBounds())
return Optional<std::pair<int64_t, int64_t>>();
auto lb = ifOp.getConstantLowerBound();
auto ub = ifOp.getConstantUpperBound();
auto step = ifOp.getStep();
lbs.push_back(lb);
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
}
// TODO: maybe a more efficient algorithm.
auto operandNum = bound.getNumOperands();
SmallVector<int64_t, 16> results;
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
SmallVector<AffineExpr, 4> replacements;
for (unsigned pos = 0; pos < operandNum; ++pos) {
if (i >> pos % 2 == 0)
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
else
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
}
auto newExpr =
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
results.push_back(constExpr.getValue());
else
return Optional<std::pair<int64_t, int64_t>>();
}
auto minmax = std::minmax_element(results.begin(), results.end());
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
}
// Check if the lhsOp and rhsOp is at the same scheduling level. In this // Check if the lhsOp and rhsOp is at the same scheduling level. In this
// check, AffineIfOp is transparent. // check, AffineIfOp is transparent.
Optional<std::pair<Operation *, Operation *>> Optional<std::pair<Operation *, Operation *>>

View File

@ -0,0 +1,34 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#include "Analysis/QoREstimation.h"
#include "Dialect/HLSCpp/HLSCpp.h"
#include "Transforms/Passes.h"
using namespace std;
using namespace mlir;
using namespace scalehls;
using namespace hlscpp;
namespace {
struct MultipleLevelDSE : public MultipleLevelDSEBase<MultipleLevelDSE> {
void runOnOperation() override;
};
} // namespace
void MultipleLevelDSE::runOnOperation() {
// Read configuration file.
INIReader spec(targetSpec);
if (spec.ParseError())
emitError(getOperation().getLoc(), "error: target spec file parse fail, "
"please pass in correct file path\n");
// Collect profiling latency data.
LatencyMap latencyMap;
getLatencyMap(spec, latencyMap);
}
std::unique_ptr<mlir::Pass> scalehls::createMultipleLevelDSEPass() {
return std::make_unique<MultipleLevelDSE>();
}

View File

@ -1,22 +0,0 @@
//===------------------------------------------------------------*- C++ -*-===//
//
//===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "Dialect/HLSCpp/HLSCpp.h"
#include "Transforms/Passes.h"
using namespace std;
using namespace mlir;
using namespace scalehls;
using namespace hlscpp;
namespace {
struct PragmaDSE : public PragmaDSEBase<PragmaDSE> {
void runOnOperation() override {}
};
} // namespace
std::unique_ptr<mlir::Pass> scalehls::createPragmaDSEPass() {
return std::make_unique<PragmaDSE>();
}

View File

@ -2,7 +2,6 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "Analysis/Utils.h"
#include "Transforms/Passes.h" #include "Transforms/Passes.h"
#include "mlir/IR/IntegerSet.h" #include "mlir/IR/IntegerSet.h"
#include "mlir/Transforms/LoopUtils.h" #include "mlir/Transforms/LoopUtils.h"
@ -25,6 +24,58 @@ struct RemoveVariableBound
}; };
} // namespace } // namespace
static Optional<std::pair<int64_t, int64_t>>
getBoundOfAffineBound(AffineBound bound, MLIRContext *context) {
// For now, we can only handle one result affine bound.
if (bound.getMap().getNumResults() != 1)
return Optional<std::pair<int64_t, int64_t>>();
SmallVector<int64_t, 4> lbs;
SmallVector<int64_t, 4> ubs;
for (auto operand : bound.getOperands()) {
// Only if the affine bound operands are induction variable, the calculation
// is possible.
if (!isForInductionVar(operand))
return Optional<std::pair<int64_t, int64_t>>();
// Only if the owner for op of the induction variable has constant bound,
// the calculation is possible.
auto ifOp = getForInductionVarOwner(operand);
if (!ifOp.hasConstantBounds())
return Optional<std::pair<int64_t, int64_t>>();
auto lb = ifOp.getConstantLowerBound();
auto ub = ifOp.getConstantUpperBound();
auto step = ifOp.getStep();
lbs.push_back(lb);
ubs.push_back(ub - 1 - (ub - 1 - lb) % step);
}
// TODO: maybe a more efficient algorithm.
auto operandNum = bound.getNumOperands();
SmallVector<int64_t, 16> results;
for (unsigned i = 0, e = pow(2, operandNum); i < e; ++i) {
SmallVector<AffineExpr, 4> replacements;
for (unsigned pos = 0; pos < operandNum; ++pos) {
if (i >> pos % 2 == 0)
replacements.push_back(getAffineConstantExpr(lbs[pos], context));
else
replacements.push_back(getAffineConstantExpr(ubs[pos], context));
}
auto newExpr =
bound.getMap().getResult(0).replaceDimsAndSymbols(replacements, {});
if (auto constExpr = newExpr.dyn_cast<AffineConstantExpr>())
results.push_back(constExpr.getValue());
else
return Optional<std::pair<int64_t, int64_t>>();
}
auto minmax = std::minmax_element(results.begin(), results.end());
return std::pair<int64_t, int64_t>(*minmax.first, *minmax.second);
}
/// Apply remove variable bound to all inner loops of the input loop. /// Apply remove variable bound to all inner loops of the input loop.
bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) { bool scalehls::applyRemoveVariableBound(AffineForOp loop, OpBuilder &builder) {
SmallVector<AffineForOp, 4> nestedLoops; SmallVector<AffineForOp, 4> nestedLoops;

View File

@ -0,0 +1,6 @@
// RUN: scalehls-opt -multiple-level-dse %s | FileCheck %s
// CHECK-LABEL: func @test_for
func @test_for() {
return
}