From 10252043d4fa03a0cbb8088db19e98fe0ded52f7 Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Mon, 28 Feb 2022 18:26:37 -0600 Subject: [PATCH] [FakeQuantize] Implement this pass; Add fake-quantize option to the pytorch pipeline --- include/scalehls/Transforms/Passes.h | 5 ++ include/scalehls/Transforms/Passes.td | 6 ++ lib/Transforms/CMakeLists.txt | 1 + lib/Transforms/Graph/FakeQuantize.cpp | 82 +++++++++++++++++++ lib/Transforms/Loop/AffineLoopOrderOpt.cpp | 1 + lib/Transforms/Loop/AffineLoopPerfection.cpp | 2 + .../Loop/AffineLoopUnrollAndPipeline.cpp | 29 ++++--- lib/Transforms/Loop/RemoveVariableBound.cpp | 2 + lib/Transforms/Passes.cpp | 9 +- 9 files changed, 125 insertions(+), 12 deletions(-) create mode 100644 lib/Transforms/Graph/FakeQuantize.cpp diff --git a/include/scalehls/Transforms/Passes.h b/include/scalehls/Transforms/Passes.h index 20510ca..3784398 100644 --- a/include/scalehls/Transforms/Passes.h +++ b/include/scalehls/Transforms/Passes.h @@ -38,6 +38,10 @@ struct ScaleHLSPyTorchPipelineOptions Option vectorSize{ *this, "vector-size", llvm::cl::desc("The size of vectorization (set 0 to disable)")}; + + Option fakeQuantize{ + *this, "fake-quantize", llvm::cl::init(false), + llvm::cl::desc("Trigger the fake quantization (just for testing use)")}; }; /// QoR estimation and DSE passes. @@ -45,6 +49,7 @@ std::unique_ptr createQoREstimationPass(); std::unique_ptr createMultipleLevelDSEPass(); /// Graph optimization passes. +std::unique_ptr createFakeQuantizePass(); std::unique_ptr createCreateRuntimeMainPass(); std::unique_ptr createCreateRuntimeMainPass(const ScaleHLSPyTorchPipelineOptions &opts); diff --git a/include/scalehls/Transforms/Passes.td b/include/scalehls/Transforms/Passes.td index a873611..9dd03c1 100644 --- a/include/scalehls/Transforms/Passes.td +++ b/include/scalehls/Transforms/Passes.td @@ -63,6 +63,12 @@ def MultipleLevelDSE : Pass<"dse", "ModuleOp"> { // Graph Optimization Passes //===----------------------------------------------------------------------===// +def FakeQuantize : Pass<"fake-quantize", "ModuleOp"> { + let summary = "Convert to 8-bits quantized model (only for testing use)"; + + let constructor = "mlir::scalehls::createFakeQuantizePass()"; +} + def CreateRuntimeMain : Pass<"create-runtime-main", "ModuleOp"> { let summary = "Create the main function of runtime"; let description = [{ diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt index 5bf706b..689256e 100644 --- a/lib/Transforms/CMakeLists.txt +++ b/lib/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_mlir_library(MLIRScaleHLSTransforms Directive/LoopPipelining.cpp Graph/ConvertCopyToAffineLoops.cpp Graph/CreateRuntimeMain.cpp + Graph/FakeQuantize.cpp Graph/LegalizeDataflow.cpp Graph/SimplifyTosaGraph.cpp Graph/SplitFunction.cpp diff --git a/lib/Transforms/Graph/FakeQuantize.cpp b/lib/Transforms/Graph/FakeQuantize.cpp new file mode 100644 index 0000000..76738f1 --- /dev/null +++ b/lib/Transforms/Graph/FakeQuantize.cpp @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Copyright 2020-2021 The ScaleHLS Authors. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tosa/IR/TosaOps.h" +#include "scalehls/Transforms/Passes.h" +#include "scalehls/Transforms/Utils.h" + +using namespace mlir; +using namespace scalehls; + +static Type getQuantizeType(Type type) { + auto i8Type = IntegerType::get(type.getContext(), 8); + if (type.isa()) + return i8Type; + + if (auto tensorType = type.dyn_cast()) + if (tensorType.getElementType().isa()) + return RankedTensorType::get(tensorType.getShape(), i8Type); + + return nullptr; +} + +namespace { +/// This pass is only for testing use!!! To really support quantized model, +/// first we need to have front-ends, such as Torch-MLIR, to support the model +/// quantization, which has not came true unfortunately. +struct FakeQuantize : public FakeQuantizeBase { + void runOnOperation() override { + auto module = getOperation(); + auto builder = OpBuilder(module); + + // Convert the type of block arguments. + module.walk([&](Block *block) { + for (auto arg : block->getArguments()) + if (auto quantType = getQuantizeType(arg.getType())) + arg.setType(quantType); + }); + + // Convert the type of operation results. Also, handle function, constant, + // conv2d, and matmul operations. + int8_t fakeIdx = 0; + module.walk([&](Operation *op) { + for (auto result : op->getResults()) + if (auto quantType = getQuantizeType(result.getType())) { + result.setType(quantType); + + if (auto constant = dyn_cast(op)) { + // Because we are not trying to really quantize the model, here we + // just assign a fake value to the constant operation. + SmallVector list(constant.value().size(), fakeIdx++); + // for (auto value : constant.valueAttr().getValues()) + // list.push_back(value); + + auto quantValue = DenseIntElementsAttr::get(quantType, list); + constant->setAttr(constant.valueAttrName(), quantValue); + } + + if (auto conv2d = dyn_cast(op)) { + auto quantInfoAttr = tosa::ConvOpQuantizationAttr::get( + builder.getI32IntegerAttr(0), builder.getI32IntegerAttr(0), + conv2d.getContext()); + conv2d->setAttr(conv2d.quantization_infoAttrName(), quantInfoAttr); + } + } + + // As we have updated the type of all values in the function, we can + // safely convert the function type as well. + if (auto func = dyn_cast(op)) + func.setType(FunctionType::get( + func.getContext(), func.front().getArgumentTypes(), + func.back().getTerminator()->getOperandTypes())); + }); + } +}; +} // namespace + +std::unique_ptr scalehls::createFakeQuantizePass() { + return std::make_unique(); +} diff --git a/lib/Transforms/Loop/AffineLoopOrderOpt.cpp b/lib/Transforms/Loop/AffineLoopOrderOpt.cpp index 7c6c92a..79ca3b7 100644 --- a/lib/Transforms/Loop/AffineLoopOrderOpt.cpp +++ b/lib/Transforms/Loop/AffineLoopOrderOpt.cpp @@ -25,6 +25,7 @@ bool scalehls::applyAffineLoopOrderOpt(AffineLoopBand &band, ArrayRef permMap, bool reverse) { LLVM_DEBUG(llvm::dbgs() << "Loop order opt ";); + assert(!band.empty() && "no loops provided"); if (!isPerfectlyNested(band)) return false; diff --git a/lib/Transforms/Loop/AffineLoopPerfection.cpp b/lib/Transforms/Loop/AffineLoopPerfection.cpp index 5bb1ff3..ba42cd4 100644 --- a/lib/Transforms/Loop/AffineLoopPerfection.cpp +++ b/lib/Transforms/Loop/AffineLoopPerfection.cpp @@ -16,6 +16,8 @@ using namespace scalehls; /// Apply loop perfection. Try to sink all operations between loop statements /// into the innermost loop of the input loop band. bool scalehls::applyAffineLoopPerfection(AffineLoopBand &band) { + assert(!band.empty() && "no loops provided"); + auto innermostLoop = band.back(); auto builder = OpBuilder(innermostLoop); diff --git a/lib/Transforms/Loop/AffineLoopUnrollAndPipeline.cpp b/lib/Transforms/Loop/AffineLoopUnrollAndPipeline.cpp index b99165c..fc41ad7 100644 --- a/lib/Transforms/Loop/AffineLoopUnrollAndPipeline.cpp +++ b/lib/Transforms/Loop/AffineLoopUnrollAndPipeline.cpp @@ -83,34 +83,42 @@ static void simplifyAffineStructures(Block &block) { /// innermost tile-space loop. Optional scalehls::applyLoopTiling(AffineLoopBand &band, TileList tileList, bool simplify) { + assert(!band.empty() && "no loops provided"); if (!isPerfectlyNested(band)) return Optional(); // Loop tiling. - auto bandSize = band.size(); + auto originalBandSize = band.size(); AffineLoopBand tiledBand; if (failed(tilePerfectlyNested(band, tileList, &tiledBand))) return Optional(); + // Simplify the tiled loop band if required. if (simplify) { band.clear(); unsigned simplifiedBandSize = 0; for (unsigned i = 0, e = tiledBand.size(); i < e; ++i) { auto loop = tiledBand[i]; - (void)normalizeAffineFor(loop); + + Optional tripCount = getConstantTripCount(loop); + if (i < originalBandSize - 1 || simplifiedBandSize > 0 || !tripCount || + tripCount.getValue() != 1) + (void)normalizeAffineFor(loop); + if (loop && !loop.getLoopBody().empty()) { band.push_back(loop); - if (i < bandSize) + if (i < originalBandSize) ++simplifiedBandSize; } } simplifyAffineStructures(*band.front().getBody()); return simplifiedBandSize - 1; - } else { - band = tiledBand; - return bandSize - 1; } + + // Otherwise, directly return the tiled loop band. + band = tiledBand; + return originalBandSize - 1; } namespace { @@ -151,13 +159,14 @@ struct AffineLoopUnrollAndPipeline sizes.push_back(1); } - auto tileLoc = applyLoopTiling(band, sizes).getValue(); - band.resize(tileLoc + 1); + // Apply loop tiling and extract the tile loops if applicable. + if (auto tileLoc = applyLoopTiling(band, sizes)) + band.resize(tileLoc.getValue() + 1); - // TODO: canonicalize here to eliminate affine.apply ops? + // Apply loop order optimization and pipelining. if (loopOrderOpt) applyAffineLoopOrderOpt(band); - applyLoopPipelining(band, tileLoc, (unsigned)1); + applyLoopPipelining(band, band.size() - 1, (unsigned)1); } } }; diff --git a/lib/Transforms/Loop/RemoveVariableBound.cpp b/lib/Transforms/Loop/RemoveVariableBound.cpp index 51945ec..f7c8593 100644 --- a/lib/Transforms/Loop/RemoveVariableBound.cpp +++ b/lib/Transforms/Loop/RemoveVariableBound.cpp @@ -14,6 +14,8 @@ using namespace scalehls; /// Apply remove variable bound to all inner loops of the input loop. bool scalehls::applyRemoveVariableBound(AffineLoopBand &band) { + assert(!band.empty() && "no loops provided"); + auto innermostLoop = band.back(); auto builder = OpBuilder(innermostLoop); diff --git a/lib/Transforms/Passes.cpp b/lib/Transforms/Passes.cpp index 54feef8..6be12de 100644 --- a/lib/Transforms/Passes.cpp +++ b/lib/Transforms/Passes.cpp @@ -42,6 +42,9 @@ void scalehls::registerScaleHLSPyTorchPipeline() { if (opts.vectorSize.hasValue()) vectorSize = opts.vectorSize; + if (opts.fakeQuantize) + pm.addPass(scalehls::createFakeQuantizePass()); + // Graph-level optimizations. pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(scalehls::createSimplifyTosaGraphPass()); @@ -63,12 +66,14 @@ void scalehls::registerScaleHLSPyTorchPipeline() { pm.addPass(scalehls::createConvertCopyToAffineLoopsPass()); // Loop-level optimizations. - if (vectorSize) - pm.addPass(mlir::createSuperVectorizePass({vectorSize})); pm.addPass(memref::createFoldSubViewOpsPass()); pm.addPass(mlir::createAffineLoopNormalizePass()); pm.addPass(mlir::createSimplifyAffineStructuresPass()); pm.addPass(mlir::createCanonicalizerPass()); + if (vectorSize) { + pm.addPass(mlir::createSuperVectorizePass({vectorSize})); + pm.addPass(mlir::createCanonicalizerPass()); + } pm.addPass(scalehls::createLegalizeToHLSCppPass(opts)); pm.addPass(scalehls::createMaterializeReductionPass()); if (loopUnrollSize) {