diff --git a/include/scalehls/Analysis/Passes.h b/include/scalehls/Analysis/Passes.h index 387e446..9e9c4a7 100644 --- a/include/scalehls/Analysis/Passes.h +++ b/include/scalehls/Analysis/Passes.h @@ -18,7 +18,6 @@ namespace mlir { namespace scalehls { std::unique_ptr createQoREstimationPass(); -std::unique_ptr createProfileDesignSpacePass(); void registerAnalysisPasses(); diff --git a/include/scalehls/Analysis/Passes.td b/include/scalehls/Analysis/Passes.td index 034553f..aec3cb1 100644 --- a/include/scalehls/Analysis/Passes.td +++ b/include/scalehls/Analysis/Passes.td @@ -27,25 +27,4 @@ def QoREstimation : Pass<"qor-estimation", "ModuleOp"> { ]; } -def ProfileDesignSpace : Pass<"profile-design-space", "ModuleOp"> { - let summary = "Optimize HLS design at multiple abstraction level"; - let description = [{ - This profile-design-space pass will profile the partial design space and - output clock cycle and resource utilization estimation results. - }]; - - let constructor = "mlir::scalehls::createProfileDesignSpacePass()"; - - let options = [ - Option<"targetSpec", "target-spec", "std::string", - /*default=*/"\"../config/target-spec.ini\"", - "File path: target backend specifications and configurations">, - Option<"profileFile", "profile-file", "std::string", - /*default=*/"\"-\"", "File path: the output file path of profiling">, - Option<"maxParallel", "max-parallel", "unsigned", /*default=*/"1", - "Positive number: the maximum tiling parallelism of the profiling"> - ]; -} - - #endif // SCALEHLS_ANALYSIS_PASSES_TD diff --git a/include/scalehls/Transforms/MultipleLevelDSE.h b/include/scalehls/Transforms/MultipleLevelDSE.h index 5b6ad12..6eaec3d 100644 --- a/include/scalehls/Transforms/MultipleLevelDSE.h +++ b/include/scalehls/Transforms/MultipleLevelDSE.h @@ -7,9 +7,7 @@ #ifndef SCALEHLS_TRANSFORMS_MULTIPLELEVELDSE_H #define SCALEHLS_TRANSFORMS_MULTIPLELEVELDSE_H -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "scalehls/Analysis/QoREstimation.h" -#include "scalehls/Transforms/Utils.h" namespace mlir { namespace scalehls { @@ -22,55 +20,15 @@ class ScaleHLSOptimizer : public ScaleHLSAnalysisBase { public: explicit ScaleHLSOptimizer(Builder &builder, ScaleHLSEstimator &estimator, int64_t numDSP) - : ScaleHLSAnalysisBase(builder), estimator(estimator), numDSP(numDSP) { - // TODO: only insert affine-related patterns. - OwningRewritePatternList owningPatterns; - for (auto *op : builder.getContext()->getRegisteredOperations()) - op->getCanonicalizationPatterns(owningPatterns, builder.getContext()); - patterns = std::move(owningPatterns); - } - - enum LoopState { HOT = 0, COLD = 1, FROZEN = 2 }; - using BandState = SmallVector; - - bool loopBandIsFrozen(BandState bandState) { - for (auto loopState : bandState) - if (loopState != LoopState::FROZEN) - return false; - return true; - } - - bool loopBandIsColdOrFrozen(BandState bandState) { - for (auto loopState : bandState) - if (loopState == LoopState::HOT) - return false; - return true; - } - - bool loopBandIsOneHot(BandState bandState) { - unsigned hotNum = 0; - for (auto loopState : bandState) - if (loopState == LoopState::HOT) - hotNum++; - - if (hotNum == 1) - return true; - else - return false; - } + : ScaleHLSAnalysisBase(builder), estimator(estimator), numDSP(numDSP) {} void emitDebugInfo(FuncOp targetFunc, StringRef message); - void emitTilingInfo(FuncOp targetFunc, ArrayRef tileSizesList); - - bool incrTileSizeAtLoc(TileSizes &tileSizes, TileSizes &tripCounts, - unsigned &loc); /// This is a temporary approach that does not scale. void applyMultipleLevelDSE(FuncOp func); ScaleHLSEstimator &estimator; int64_t numDSP; - FrozenRewritePatternList patterns; }; } // namespace scalehls diff --git a/include/scalehls/Transforms/Passes.h b/include/scalehls/Transforms/Passes.h index ea95823..23808b1 100644 --- a/include/scalehls/Transforms/Passes.h +++ b/include/scalehls/Transforms/Passes.h @@ -19,6 +19,7 @@ namespace scalehls { /// Design space exploration pass. std::unique_ptr createMultipleLevelDSEPass(); +std::unique_ptr createProfileDesignSpacePass(); /// Dataflow optimization passes. std::unique_ptr createLegalizeDataflowPass(); diff --git a/include/scalehls/Transforms/Passes.td b/include/scalehls/Transforms/Passes.td index 5ab7089..a46763d 100644 --- a/include/scalehls/Transforms/Passes.td +++ b/include/scalehls/Transforms/Passes.td @@ -32,6 +32,26 @@ def MultipleLevelDSE : Pass<"multiple-level-dse", "ModuleOp"> { ]; } +def ProfileDesignSpace : Pass<"profile-design-space", "ModuleOp"> { + let summary = "Optimize HLS design at multiple abstraction level"; + let description = [{ + This profile-design-space pass will profile the partial design space and + output clock cycle and resource utilization estimation results. + }]; + + let constructor = "mlir::scalehls::createProfileDesignSpacePass()"; + + let options = [ + Option<"targetSpec", "target-spec", "std::string", + /*default=*/"\"../config/target-spec.ini\"", + "File path: target backend specifications and configurations">, + Option<"outputFile", "output-file", "std::string", + /*default=*/"\"-\"", "File path: the output file path of profiling">, + Option<"maxParallel", "max-parallel", "unsigned", /*default=*/"1", + "Positive number: the maximum tiling parallelism of the profiling"> + ]; +} + //===----------------------------------------------------------------------===// // Dataflow Optimization Passes //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 567d311..f31535f 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -5,8 +5,4 @@ add_mlir_library(MLIRScaleHLSAnalysis DEPENDS MLIRScaleHLSAnalysisIncGen - - LINK_LIBS PUBLIC - MLIRHLSCpp - MLIRHLSKernel ) diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt index 5ac9caa..69ad403 100644 --- a/lib/Conversion/CMakeLists.txt +++ b/lib/Conversion/CMakeLists.txt @@ -5,8 +5,4 @@ add_mlir_library(MLIRScaleHLSConversion DEPENDS MLIRScaleHLSConversionIncGen - - LINK_LIBS PUBLIC - MLIRHLSCpp - MLIRHLSKernel ) diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt index bb3b6bc..5245378 100644 --- a/lib/Transforms/CMakeLists.txt +++ b/lib/Transforms/CMakeLists.txt @@ -5,8 +5,4 @@ add_mlir_library(MLIRScaleHLSTransforms DEPENDS MLIRScaleHLSTransformsIncGen - - LINK_LIBS PUBLIC - MLIRHLSCpp - MLIRHLSKernel ) diff --git a/lib/Transforms/MultipleLevelDSE.cpp b/lib/Transforms/MultipleLevelDSE.cpp index c68aaea..7914c5f 100644 --- a/lib/Transforms/MultipleLevelDSE.cpp +++ b/lib/Transforms/MultipleLevelDSE.cpp @@ -47,48 +47,8 @@ void ScaleHLSOptimizer::emitDebugInfo(FuncOp targetFunc, StringRef message) { << ", DSP utilization is " << Twine(dsp) << ".\n\n";); } -void ScaleHLSOptimizer::emitTilingInfo(FuncOp targetFunc, - ArrayRef tileSizesList) { - // Estimate performance and resource utilization. - estimator.estimateFunc(targetFunc); - LLVM_DEBUG(llvm::dbgs() << "Current tiling strategy:\n"; - for (unsigned idx = 0; idx < tileSizesList.size(); ++idx) { - auto tileSizes = tileSizesList[idx]; - llvm::dbgs() << "Loop band " << Twine(idx) << ":"; - - for (auto size : tileSizes) - llvm::dbgs() << " " << Twine(size); - llvm::dbgs() << "\n"; - }); - - emitDebugInfo(targetFunc, "Apply loop tiling and pipelining, generic IR " - "opts, and array partition."); -} - -bool ScaleHLSOptimizer::incrTileSizeAtLoc(TileSizes &tileSizes, - TileSizes &tripCounts, - unsigned &loc) { - auto size = tileSizes[loc]; - auto tripCount = tripCounts[loc]; - - if (size >= tripCount || tripCount % size != 0) - return false; - - // Fine the minimum factor that can be applied. - unsigned factor = 2; - while (tripCount % (size * factor) != 0) - factor++; - - // Increase and update tile size. - size *= factor; - tileSizes[loc] = size; - return true; -} - /// This is a temporary approach that does not scale. void ScaleHLSOptimizer::applyMultipleLevelDSE(FuncOp func) { - // Canonicalize the function and start the dse. - applyPatternsAndFoldGreedily(func, patterns); estimator.estimateFunc(func); if (getIntAttrValue(func, "dsp") > numDSP) return; @@ -229,187 +189,6 @@ void ScaleHLSOptimizer::applyMultipleLevelDSE(FuncOp func) { //===--------------------------------------------------------------------===// // STAGE 3: Loop Bands Tiling and Finalization //===--------------------------------------------------------------------===// - - // Hold trip counts of all loops in each loop band, this can also be - // considered as maxTileSizesList. - std::vector tripCountsList; - // Hold the loop number in each loop band. - SmallVector loopNumList; - - // Hold the current tiling sizes of each loop band. This is the main design - // vector which will evolve in the procedure of DSE. - std::vector tileSizesList; - std::vector targetIIList; - // Hold the DSE status of all loops in each loop band. - std::vector BandStateList; - - // Initialize all lists. - for (auto band : targetBands) { - TileSizes tripCounts; - for (auto loop : band) - tripCounts.push_back(getIntAttrValue(loop, "trip_count")); - - // These two lists will not be modified in the DSE. - tripCountsList.push_back(tripCounts); - loopNumList.push_back(band.size()); - - // These two lists will evolve in the DSE. - tileSizesList.push_back(TileSizes(band.size(), 1)); - targetIIList.push_back(1); - BandStateList.push_back(BandState(band.size(), LoopState::COLD)); - } - - // Try and record the none tiling performance. - auto nonTileFunc = func.clone(); - applyOptStrategy(nonTileFunc, tileSizesList, targetIIList); - emitTilingInfo(func, tileSizesList); - unsigned minLatency = getIntAttrValue(nonTileFunc, "latency"); - - if (getIntAttrValue(nonTileFunc, "dsp") > numDSP) - return; - nonTileFunc.erase(); - LLVM_DEBUG(llvm::dbgs() << "3. Search for the best tiling strategy.\n";); - - // Main loop for design space exploration. - unsigned iteration = 0; - while (true) { - LLVM_DEBUG(llvm::dbgs() << "Iteration " << iteration++ << ":\n\n";); - bool isAllFrozen = true; - // Walk through each target loop band. - for (unsigned i = 0; i < targetNum; ++i) { - auto &bandState = BandStateList[i]; - - // Update state of the current loop band. - for (unsigned loc = 0; loc < loopNumList[i]; ++loc) - if (tileSizesList[i][loc] >= tripCountsList[i][loc]) - bandState[loc] = LoopState::FROZEN; - - // If all loop in the current loop band are frozen, continue and visit - // next loop band. - if (loopBandIsFrozen(bandState)) - continue; - isAllFrozen = false; - - // If all loop in the current loop band are cold or frozen, walk through - // all loop levels and heat the best one to hot state. - if (loopBandIsColdOrFrozen(bandState)) { - unsigned bestLoc = 0; - unsigned bestLatency = UINT_MAX; - - for (unsigned loc = 0; loc < loopNumList[i]; ++loc) { - if (bandState[loc] == LoopState::FROZEN) - continue; - - // Increase the tile size of current location. - auto tmpTileSizesList = tileSizesList; - if (incrTileSizeAtLoc(tmpTileSizesList[i], tripCountsList[i], loc)) { - // Try to apply the new tile size. - auto tmpFunc = func.clone(); - if (applyOptStrategy(tmpFunc, tmpTileSizesList, targetIIList)) { - emitTilingInfo(tmpFunc, tmpTileSizesList); - auto latency = getIntAttrValue(tmpFunc, "latency"); - auto dsp = getIntAttrValue(tmpFunc, "dsp"); - - if (dsp < numDSP && latency < bestLatency * 0.95) { - bestLoc = loc; - bestLatency = latency; - } - // Move to the next location. - continue; - } - } - - // If the current loop cannot be further tiled, set it as frozen. - bandState[loc] = LoopState::FROZEN; - } - - if (bestLatency != UINT_MAX) { - // Heat the best loop location. If the best latency is already better - // than the minimum found latency, apply it. Otherwise, only heat the - // location. - bandState[bestLoc] = LoopState::HOT; - if (bestLatency < minLatency * 0.95) { - incrTileSizeAtLoc(tileSizesList[i], tripCountsList[i], bestLoc); - minLatency = bestLatency; - } - } else { - // If cannot find a proper tiling strategy for the current loop band, - // frozen all loops. - for (unsigned loc = 0; loc < loopNumList[i]; ++loc) - bandState[loc] = LoopState::FROZEN; - } - // Move to the next DSE iteration. - continue; - } - - // For now, there should only one loop locations are in HOT state. - if (loopBandIsOneHot(bandState)) { - unsigned hotLoc = 0; - for (unsigned loc = 0; loc < loopNumList[i]; ++loc) - if (bandState[loc] == LoopState::HOT) - hotLoc = loc; - - unsigned lastLatency = minLatency; - unsigned tolerantCounter = 0; - - // Increase the tile size of current location until the latency is - // improved or tile size cannot be further increased. - auto tmpTileSizesList = tileSizesList; - while (true) { - // If the latency has not been improved for more than a certain - // number of iterations, stop to increase tile size. - if (tolerantCounter > 1) { - bandState[hotLoc] = LoopState::FROZEN; - break; - } - - // Try to increase the tile size. - if (incrTileSizeAtLoc(tmpTileSizesList[i], tripCountsList[i], - hotLoc)) { - // Try to apply the new tile size. - auto tmpFunc = func.clone(); - if (applyOptStrategy(tmpFunc, tmpTileSizesList, targetIIList)) { - emitTilingInfo(tmpFunc, tmpTileSizesList); - auto latency = getIntAttrValue(tmpFunc, "latency"); - auto dsp = getIntAttrValue(tmpFunc, "dsp"); - - if (dsp < numDSP && latency < minLatency * 0.95) { - // If find a new minimum latency, apply it. - tileSizesList = tmpTileSizesList; - minLatency = latency; - break; - } else if (dsp < numDSP && latency < lastLatency * 0.95) { - // If the latency is better than the last iteration, even if it - // is not the minimum latency, continue to try on the hot loop - // location. - lastLatency = latency; - tolerantCounter = 0; - continue; - } else { - // If the latency is worse than the last iteration, increase the - // tolerant counter by 1 and continue to - lastLatency = latency; - tolerantCounter++; - continue; - } - } - } - - // If the hot location cannot contribute to the improvement of - // latency, set it as frozen. - bandState[hotLoc] = LoopState::FROZEN; - break; - } - } - } - if (isAllFrozen) - break; - } - - // Finally, we found the best tiling strategy. - LLVM_DEBUG(llvm::dbgs() << "4. Apply the best tiling strategy.\n";); - applyOptStrategy(func, tileSizesList, targetIIList); - emitTilingInfo(func, tileSizesList); } namespace { diff --git a/lib/Analysis/ProfileDesignSpace.cpp b/lib/Transforms/ProfileDesignSpace.cpp similarity index 98% rename from lib/Analysis/ProfileDesignSpace.cpp rename to lib/Transforms/ProfileDesignSpace.cpp index 75c385a..24bd212 100644 --- a/lib/Analysis/ProfileDesignSpace.cpp +++ b/lib/Transforms/ProfileDesignSpace.cpp @@ -6,8 +6,8 @@ #include "mlir/Analysis/LoopAnalysis.h" #include "mlir/Support/FileUtilities.h" -#include "scalehls/Analysis/Passes.h" #include "scalehls/Analysis/QoREstimation.h" +#include "scalehls/Transforms/Passes.h" #include "scalehls/Transforms/Utils.h" #include "llvm/Support/ToolOutputFile.h" @@ -185,7 +185,7 @@ struct ProfileDesignSpace : public ProfileDesignSpaceBase { if (auto topFunction = func->getAttrOfType("top_function")) if (topFunction.getValue()) { std::string errorMessage; - auto output = mlir::openOutputFile(profileFile, &errorMessage); + auto output = mlir::openOutputFile(outputFile, &errorMessage); if (!output) emitError(module.getLoc(), errorMessage); diff --git a/tools/benchmark-gen/CMakeLists.txt b/tools/benchmark-gen/CMakeLists.txt index 960b9f7..426684d 100644 --- a/tools/benchmark-gen/CMakeLists.txt +++ b/tools/benchmark-gen/CMakeLists.txt @@ -1,12 +1,14 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) -set(LIBS +add_llvm_tool(benchmark-gen + benchmark-gen.cpp + ) + +llvm_update_compile_flags(benchmark-gen) + +target_link_libraries(benchmark-gen + PRIVATE ${dialect_libs} MLIRHLSKernel ) - -add_llvm_executable(benchmark-gen benchmark-gen.cpp) - -llvm_update_compile_flags(benchmark-gen) -target_link_libraries(benchmark-gen PRIVATE ${LIBS}) diff --git a/tools/scalehls-opt/CMakeLists.txt b/tools/scalehls-opt/CMakeLists.txt index d9b6994..0ccfce9 100644 --- a/tools/scalehls-opt/CMakeLists.txt +++ b/tools/scalehls-opt/CMakeLists.txt @@ -1,10 +1,16 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) -set(LIBS +add_llvm_tool(scalehls-opt + scalehls-opt.cpp + ) + +llvm_update_compile_flags(scalehls-opt) + +target_link_libraries(scalehls-opt + PRIVATE ${dialect_libs} ${conversion_libs} - Threads::Threads MLIROptLib MLIRHLSCpp @@ -12,9 +18,6 @@ set(LIBS MLIRScaleHLSConversion MLIRScaleHLSTransforms MLIRScaleHLSAnalysis + + Threads::Threads ) - -add_llvm_executable(scalehls-opt scalehls-opt.cpp) - -llvm_update_compile_flags(scalehls-opt) -target_link_libraries(scalehls-opt PRIVATE ${LIBS}) diff --git a/tools/scalehls-translate/CMakeLists.txt b/tools/scalehls-translate/CMakeLists.txt index b40be98..4e7304f 100644 --- a/tools/scalehls-translate/CMakeLists.txt +++ b/tools/scalehls-translate/CMakeLists.txt @@ -1,21 +1,21 @@ +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) + set(LLVM_LINK_COMPONENTS Support ) -get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) -get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) +add_llvm_tool(scalehls-translate + scalehls-translate.cpp + ) -set(LIBS +llvm_update_compile_flags(scalehls-translate) + +target_link_libraries(scalehls-translate + PRIVATE ${dialect_libs} ${translation_libs} MLIRScaleHLSEmitHLSCpp MLIRScaleHLSAnalysis ) - -add_llvm_executable(scalehls-translate scalehls-translate.cpp) - -llvm_update_compile_flags(scalehls-translate) -target_link_libraries(scalehls-translate PRIVATE ${LIBS}) - -mlir_check_link_libraries(scalehls-translate)