[MultipleLevelDSE] remove greedy dse algorithm; [ProfileDesignSpace] move this pass to Transforms folder due to dependence issue; update all CMakeLists

This commit is contained in:
Hanchen Ye 2021-02-10 20:04:46 -06:00
parent 2beeff1cd5
commit d7a3456fc2
13 changed files with 52 additions and 323 deletions

View File

@ -18,7 +18,6 @@ namespace mlir {
namespace scalehls {
std::unique_ptr<Pass> createQoREstimationPass();
std::unique_ptr<Pass> createProfileDesignSpacePass();
void registerAnalysisPasses();

View File

@ -27,25 +27,4 @@ def QoREstimation : Pass<"qor-estimation", "ModuleOp"> {
];
}
def ProfileDesignSpace : Pass<"profile-design-space", "ModuleOp"> {
let summary = "Optimize HLS design at multiple abstraction level";
let description = [{
This profile-design-space pass will profile the partial design space and
output clock cycle and resource utilization estimation results.
}];
let constructor = "mlir::scalehls::createProfileDesignSpacePass()";
let options = [
Option<"targetSpec", "target-spec", "std::string",
/*default=*/"\"../config/target-spec.ini\"",
"File path: target backend specifications and configurations">,
Option<"profileFile", "profile-file", "std::string",
/*default=*/"\"-\"", "File path: the output file path of profiling">,
Option<"maxParallel", "max-parallel", "unsigned", /*default=*/"1",
"Positive number: the maximum tiling parallelism of the profiling">
];
}
#endif // SCALEHLS_ANALYSIS_PASSES_TD

View File

@ -7,9 +7,7 @@
#ifndef SCALEHLS_TRANSFORMS_MULTIPLELEVELDSE_H
#define SCALEHLS_TRANSFORMS_MULTIPLELEVELDSE_H
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "scalehls/Analysis/QoREstimation.h"
#include "scalehls/Transforms/Utils.h"
namespace mlir {
namespace scalehls {
@ -22,55 +20,15 @@ class ScaleHLSOptimizer : public ScaleHLSAnalysisBase {
public:
explicit ScaleHLSOptimizer(Builder &builder, ScaleHLSEstimator &estimator,
int64_t numDSP)
: ScaleHLSAnalysisBase(builder), estimator(estimator), numDSP(numDSP) {
// TODO: only insert affine-related patterns.
OwningRewritePatternList owningPatterns;
for (auto *op : builder.getContext()->getRegisteredOperations())
op->getCanonicalizationPatterns(owningPatterns, builder.getContext());
patterns = std::move(owningPatterns);
}
enum LoopState { HOT = 0, COLD = 1, FROZEN = 2 };
using BandState = SmallVector<LoopState, 8>;
bool loopBandIsFrozen(BandState bandState) {
for (auto loopState : bandState)
if (loopState != LoopState::FROZEN)
return false;
return true;
}
bool loopBandIsColdOrFrozen(BandState bandState) {
for (auto loopState : bandState)
if (loopState == LoopState::HOT)
return false;
return true;
}
bool loopBandIsOneHot(BandState bandState) {
unsigned hotNum = 0;
for (auto loopState : bandState)
if (loopState == LoopState::HOT)
hotNum++;
if (hotNum == 1)
return true;
else
return false;
}
: ScaleHLSAnalysisBase(builder), estimator(estimator), numDSP(numDSP) {}
void emitDebugInfo(FuncOp targetFunc, StringRef message);
void emitTilingInfo(FuncOp targetFunc, ArrayRef<TileSizes> tileSizesList);
bool incrTileSizeAtLoc(TileSizes &tileSizes, TileSizes &tripCounts,
unsigned &loc);
/// This is a temporary approach that does not scale.
void applyMultipleLevelDSE(FuncOp func);
ScaleHLSEstimator &estimator;
int64_t numDSP;
FrozenRewritePatternList patterns;
};
} // namespace scalehls

View File

@ -19,6 +19,7 @@ namespace scalehls {
/// Design space exploration pass.
std::unique_ptr<Pass> createMultipleLevelDSEPass();
std::unique_ptr<Pass> createProfileDesignSpacePass();
/// Dataflow optimization passes.
std::unique_ptr<Pass> createLegalizeDataflowPass();

View File

@ -32,6 +32,26 @@ def MultipleLevelDSE : Pass<"multiple-level-dse", "ModuleOp"> {
];
}
def ProfileDesignSpace : Pass<"profile-design-space", "ModuleOp"> {
let summary = "Optimize HLS design at multiple abstraction level";
let description = [{
This profile-design-space pass will profile the partial design space and
output clock cycle and resource utilization estimation results.
}];
let constructor = "mlir::scalehls::createProfileDesignSpacePass()";
let options = [
Option<"targetSpec", "target-spec", "std::string",
/*default=*/"\"../config/target-spec.ini\"",
"File path: target backend specifications and configurations">,
Option<"outputFile", "output-file", "std::string",
/*default=*/"\"-\"", "File path: the output file path of profiling">,
Option<"maxParallel", "max-parallel", "unsigned", /*default=*/"1",
"Positive number: the maximum tiling parallelism of the profiling">
];
}
//===----------------------------------------------------------------------===//
// Dataflow Optimization Passes
//===----------------------------------------------------------------------===//

View File

@ -5,8 +5,4 @@ add_mlir_library(MLIRScaleHLSAnalysis
DEPENDS
MLIRScaleHLSAnalysisIncGen
LINK_LIBS PUBLIC
MLIRHLSCpp
MLIRHLSKernel
)

View File

@ -5,8 +5,4 @@ add_mlir_library(MLIRScaleHLSConversion
DEPENDS
MLIRScaleHLSConversionIncGen
LINK_LIBS PUBLIC
MLIRHLSCpp
MLIRHLSKernel
)

View File

@ -5,8 +5,4 @@ add_mlir_library(MLIRScaleHLSTransforms
DEPENDS
MLIRScaleHLSTransformsIncGen
LINK_LIBS PUBLIC
MLIRHLSCpp
MLIRHLSKernel
)

View File

@ -47,48 +47,8 @@ void ScaleHLSOptimizer::emitDebugInfo(FuncOp targetFunc, StringRef message) {
<< ", DSP utilization is " << Twine(dsp) << ".\n\n";);
}
void ScaleHLSOptimizer::emitTilingInfo(FuncOp targetFunc,
ArrayRef<TileSizes> tileSizesList) {
// Estimate performance and resource utilization.
estimator.estimateFunc(targetFunc);
LLVM_DEBUG(llvm::dbgs() << "Current tiling strategy:\n";
for (unsigned idx = 0; idx < tileSizesList.size(); ++idx) {
auto tileSizes = tileSizesList[idx];
llvm::dbgs() << "Loop band " << Twine(idx) << ":";
for (auto size : tileSizes)
llvm::dbgs() << " " << Twine(size);
llvm::dbgs() << "\n";
});
emitDebugInfo(targetFunc, "Apply loop tiling and pipelining, generic IR "
"opts, and array partition.");
}
bool ScaleHLSOptimizer::incrTileSizeAtLoc(TileSizes &tileSizes,
TileSizes &tripCounts,
unsigned &loc) {
auto size = tileSizes[loc];
auto tripCount = tripCounts[loc];
if (size >= tripCount || tripCount % size != 0)
return false;
// Fine the minimum factor that can be applied.
unsigned factor = 2;
while (tripCount % (size * factor) != 0)
factor++;
// Increase and update tile size.
size *= factor;
tileSizes[loc] = size;
return true;
}
/// This is a temporary approach that does not scale.
void ScaleHLSOptimizer::applyMultipleLevelDSE(FuncOp func) {
// Canonicalize the function and start the dse.
applyPatternsAndFoldGreedily(func, patterns);
estimator.estimateFunc(func);
if (getIntAttrValue(func, "dsp") > numDSP)
return;
@ -229,187 +189,6 @@ void ScaleHLSOptimizer::applyMultipleLevelDSE(FuncOp func) {
//===--------------------------------------------------------------------===//
// STAGE 3: Loop Bands Tiling and Finalization
//===--------------------------------------------------------------------===//
// Hold trip counts of all loops in each loop band, this can also be
// considered as maxTileSizesList.
std::vector<TileSizes> tripCountsList;
// Hold the loop number in each loop band.
SmallVector<unsigned, 8> loopNumList;
// Hold the current tiling sizes of each loop band. This is the main design
// vector which will evolve in the procedure of DSE.
std::vector<TileSizes> tileSizesList;
std::vector<int64_t> targetIIList;
// Hold the DSE status of all loops in each loop band.
std::vector<BandState> BandStateList;
// Initialize all lists.
for (auto band : targetBands) {
TileSizes tripCounts;
for (auto loop : band)
tripCounts.push_back(getIntAttrValue(loop, "trip_count"));
// These two lists will not be modified in the DSE.
tripCountsList.push_back(tripCounts);
loopNumList.push_back(band.size());
// These two lists will evolve in the DSE.
tileSizesList.push_back(TileSizes(band.size(), 1));
targetIIList.push_back(1);
BandStateList.push_back(BandState(band.size(), LoopState::COLD));
}
// Try and record the none tiling performance.
auto nonTileFunc = func.clone();
applyOptStrategy(nonTileFunc, tileSizesList, targetIIList);
emitTilingInfo(func, tileSizesList);
unsigned minLatency = getIntAttrValue(nonTileFunc, "latency");
if (getIntAttrValue(nonTileFunc, "dsp") > numDSP)
return;
nonTileFunc.erase();
LLVM_DEBUG(llvm::dbgs() << "3. Search for the best tiling strategy.\n";);
// Main loop for design space exploration.
unsigned iteration = 0;
while (true) {
LLVM_DEBUG(llvm::dbgs() << "Iteration " << iteration++ << ":\n\n";);
bool isAllFrozen = true;
// Walk through each target loop band.
for (unsigned i = 0; i < targetNum; ++i) {
auto &bandState = BandStateList[i];
// Update state of the current loop band.
for (unsigned loc = 0; loc < loopNumList[i]; ++loc)
if (tileSizesList[i][loc] >= tripCountsList[i][loc])
bandState[loc] = LoopState::FROZEN;
// If all loop in the current loop band are frozen, continue and visit
// next loop band.
if (loopBandIsFrozen(bandState))
continue;
isAllFrozen = false;
// If all loop in the current loop band are cold or frozen, walk through
// all loop levels and heat the best one to hot state.
if (loopBandIsColdOrFrozen(bandState)) {
unsigned bestLoc = 0;
unsigned bestLatency = UINT_MAX;
for (unsigned loc = 0; loc < loopNumList[i]; ++loc) {
if (bandState[loc] == LoopState::FROZEN)
continue;
// Increase the tile size of current location.
auto tmpTileSizesList = tileSizesList;
if (incrTileSizeAtLoc(tmpTileSizesList[i], tripCountsList[i], loc)) {
// Try to apply the new tile size.
auto tmpFunc = func.clone();
if (applyOptStrategy(tmpFunc, tmpTileSizesList, targetIIList)) {
emitTilingInfo(tmpFunc, tmpTileSizesList);
auto latency = getIntAttrValue(tmpFunc, "latency");
auto dsp = getIntAttrValue(tmpFunc, "dsp");
if (dsp < numDSP && latency < bestLatency * 0.95) {
bestLoc = loc;
bestLatency = latency;
}
// Move to the next location.
continue;
}
}
// If the current loop cannot be further tiled, set it as frozen.
bandState[loc] = LoopState::FROZEN;
}
if (bestLatency != UINT_MAX) {
// Heat the best loop location. If the best latency is already better
// than the minimum found latency, apply it. Otherwise, only heat the
// location.
bandState[bestLoc] = LoopState::HOT;
if (bestLatency < minLatency * 0.95) {
incrTileSizeAtLoc(tileSizesList[i], tripCountsList[i], bestLoc);
minLatency = bestLatency;
}
} else {
// If cannot find a proper tiling strategy for the current loop band,
// frozen all loops.
for (unsigned loc = 0; loc < loopNumList[i]; ++loc)
bandState[loc] = LoopState::FROZEN;
}
// Move to the next DSE iteration.
continue;
}
// For now, there should only one loop locations are in HOT state.
if (loopBandIsOneHot(bandState)) {
unsigned hotLoc = 0;
for (unsigned loc = 0; loc < loopNumList[i]; ++loc)
if (bandState[loc] == LoopState::HOT)
hotLoc = loc;
unsigned lastLatency = minLatency;
unsigned tolerantCounter = 0;
// Increase the tile size of current location until the latency is
// improved or tile size cannot be further increased.
auto tmpTileSizesList = tileSizesList;
while (true) {
// If the latency has not been improved for more than a certain
// number of iterations, stop to increase tile size.
if (tolerantCounter > 1) {
bandState[hotLoc] = LoopState::FROZEN;
break;
}
// Try to increase the tile size.
if (incrTileSizeAtLoc(tmpTileSizesList[i], tripCountsList[i],
hotLoc)) {
// Try to apply the new tile size.
auto tmpFunc = func.clone();
if (applyOptStrategy(tmpFunc, tmpTileSizesList, targetIIList)) {
emitTilingInfo(tmpFunc, tmpTileSizesList);
auto latency = getIntAttrValue(tmpFunc, "latency");
auto dsp = getIntAttrValue(tmpFunc, "dsp");
if (dsp < numDSP && latency < minLatency * 0.95) {
// If find a new minimum latency, apply it.
tileSizesList = tmpTileSizesList;
minLatency = latency;
break;
} else if (dsp < numDSP && latency < lastLatency * 0.95) {
// If the latency is better than the last iteration, even if it
// is not the minimum latency, continue to try on the hot loop
// location.
lastLatency = latency;
tolerantCounter = 0;
continue;
} else {
// If the latency is worse than the last iteration, increase the
// tolerant counter by 1 and continue to
lastLatency = latency;
tolerantCounter++;
continue;
}
}
}
// If the hot location cannot contribute to the improvement of
// latency, set it as frozen.
bandState[hotLoc] = LoopState::FROZEN;
break;
}
}
}
if (isAllFrozen)
break;
}
// Finally, we found the best tiling strategy.
LLVM_DEBUG(llvm::dbgs() << "4. Apply the best tiling strategy.\n";);
applyOptStrategy(func, tileSizesList, targetIIList);
emitTilingInfo(func, tileSizesList);
}
namespace {

View File

@ -6,8 +6,8 @@
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Support/FileUtilities.h"
#include "scalehls/Analysis/Passes.h"
#include "scalehls/Analysis/QoREstimation.h"
#include "scalehls/Transforms/Passes.h"
#include "scalehls/Transforms/Utils.h"
#include "llvm/Support/ToolOutputFile.h"
@ -185,7 +185,7 @@ struct ProfileDesignSpace : public ProfileDesignSpaceBase<ProfileDesignSpace> {
if (auto topFunction = func->getAttrOfType<BoolAttr>("top_function"))
if (topFunction.getValue()) {
std::string errorMessage;
auto output = mlir::openOutputFile(profileFile, &errorMessage);
auto output = mlir::openOutputFile(outputFile, &errorMessage);
if (!output)
emitError(module.getLoc(), errorMessage);

View File

@ -1,12 +1,14 @@
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
set(LIBS
add_llvm_tool(benchmark-gen
benchmark-gen.cpp
)
llvm_update_compile_flags(benchmark-gen)
target_link_libraries(benchmark-gen
PRIVATE
${dialect_libs}
MLIRHLSKernel
)
add_llvm_executable(benchmark-gen benchmark-gen.cpp)
llvm_update_compile_flags(benchmark-gen)
target_link_libraries(benchmark-gen PRIVATE ${LIBS})

View File

@ -1,10 +1,16 @@
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
set(LIBS
add_llvm_tool(scalehls-opt
scalehls-opt.cpp
)
llvm_update_compile_flags(scalehls-opt)
target_link_libraries(scalehls-opt
PRIVATE
${dialect_libs}
${conversion_libs}
Threads::Threads
MLIROptLib
MLIRHLSCpp
@ -12,9 +18,6 @@ set(LIBS
MLIRScaleHLSConversion
MLIRScaleHLSTransforms
MLIRScaleHLSAnalysis
Threads::Threads
)
add_llvm_executable(scalehls-opt scalehls-opt.cpp)
llvm_update_compile_flags(scalehls-opt)
target_link_libraries(scalehls-opt PRIVATE ${LIBS})

View File

@ -1,21 +1,21 @@
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS)
set(LLVM_LINK_COMPONENTS
Support
)
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS)
add_llvm_tool(scalehls-translate
scalehls-translate.cpp
)
set(LIBS
llvm_update_compile_flags(scalehls-translate)
target_link_libraries(scalehls-translate
PRIVATE
${dialect_libs}
${translation_libs}
MLIRScaleHLSEmitHLSCpp
MLIRScaleHLSAnalysis
)
add_llvm_executable(scalehls-translate scalehls-translate.cpp)
llvm_update_compile_flags(scalehls-translate)
target_link_libraries(scalehls-translate PRIVATE ${LIBS})
mlir_check_link_libraries(scalehls-translate)