[BenchmarkGen] support generate CNN benchmarks without considering the last dense layer (#15)

2020-12-01 18:31:23 -06:00 · 2020-12-01 18:31:23 -06:00 · e5cd6673e7
parent ea7b6a1495
commit e5cd6673e7
5 changed files with 184 additions and 35 deletions
--- a/config/cnn-config.ini
+++ b/config/cnn-config.ini
@ -1,6 +1,13 @@
 [config]
+# input and output information
+inputChannel=3
 inputHeight=224
 inputWidth=224
-inputChannel=3
 outputChannel=1000
+
+# Generation rules
+batchSize=1
+minChannel=64
 maxChannel=512
+poolingNumber=5
+bypassNumber=0
--- a/include/Dialect/HLSKernel/CNNOps.td
+++ b/include/Dialect/HLSKernel/CNNOps.td
@ -36,10 +36,8 @@ def ConvOp : HLSKernelOp<"conv", [HLSKernelOpInterface]> {

    Strides and padding are integer attributes of the same rank as the number of
    window dimensions. The padding attribute specifies the amount of zero
-    padding to be applied to the base area, which is a n-d array of (low, high)
-    padding. Each pair has the low padding as the first element and the high
-    padding as the second element. Using padding is equivalent to inserting
-    those same zero values into the input before the convolution.
+    padding to be applied to the base area, which is a 4-d array of (top,
+    bottom, left, right) padding.
  }];

  let arguments = (ins
@ -48,7 +46,7 @@ def ConvOp : HLSKernelOp<"conv", [HLSKernelOpInterface]> {
    AnyMemRef:$B,
    AnyMemRef:$O,
    OptionalAttr<I64ArrayAttr>:$strides,
-    OptionalAttr<I64ElementsAttr>:$padding
+    OptionalAttr<I64ArrayAttr>:$padding
  );
 }

@ -67,7 +65,7 @@ def MaxPoolOp : HLSKernelOp<"maxpool", [HLSKernelOpInterface]> {
    AnyMemRef:$O,
    OptionalAttr<I64ArrayAttr>:$kernel_shape,
    OptionalAttr<I64ArrayAttr>:$strides,
-    OptionalAttr<I64ElementsAttr>:$padding
+    OptionalAttr<I64ArrayAttr>:$padding
  );
 }

--- a/lib/Conversion/HLSKernelToAffine/HLSKernelToAffine.cpp
+++ b/lib/Conversion/HLSKernelToAffine/HLSKernelToAffine.cpp
@ -45,15 +45,15 @@ private:
  OpBuilder &builder;
  Location loc;

-  // Helpers for creating loops.
-  // Constant upper and lower bound.
+  /// Helpers for creating loops.
+  /// Constant upper and lower bound.
  Value createLoop(int64_t lower, int64_t upper, int64_t step = 1) {
    auto loop = builder.create<mlir::AffineForOp>(loc, lower, upper, step);
    builder.setInsertionPointToStart(&loop.getLoopBody().front());
    return loop.getInductionVar();
  }

-  // General case.
+  /// General case loop boundary.
  Value createLoop(std::initializer_list<Value> lower, AffineMap lowerMap,
                   std::initializer_list<Value> upper, AffineMap upperMap,
                   int64_t step = 1) {
@ -80,7 +80,7 @@ private:
    return createLoop({}, lowerMap, {upper}, upperMap);
  }

-  // Helpers for creating constant, loads, stores and binary operations.
+  /// Helpers for creating constant, loads, stores and binary operations.
  Value createConst(int64_t val, Type valType) {
    if (valType.isa<IntegerType>())
      return builder.create<mlir::ConstantOp>(
@ -170,8 +170,8 @@ bool HLSKernelVisitor::visitOp(DenseOp op) {
 /// Padding and strides has not been suppored.
 bool HLSKernelVisitor::visitOp(ConvOp op) {
  SmallVector<int64_t, 4> padding;
-  for (auto pad : op.getAttrOfType<DenseIntElementsAttr>("padding"))
-    padding.push_back(pad.getSExtValue());
+  for (auto pad : op.getAttrOfType<ArrayAttr>("padding"))
+    padding.push_back(pad.cast<IntegerAttr>().getInt());

  auto I = op.getOperand(0);
  auto K = op.getOperand(1);
@ -257,8 +257,8 @@ bool HLSKernelVisitor::visitOp(ConvOp op) {
  return true;
 }

-// Padding and strides has not been suppored. Only support when kernel size is
-// equal to stride size.
+/// Padding and strides has not been suppored. Only support when kernel size is
+/// equal to stride size.
 bool HLSKernelVisitor::visitOp(MaxPoolOp op) {
  SmallVector<int64_t, 2> kernelShape;
  for (auto shape : op.getAttrOfType<ArrayAttr>("kernel_shape"))
@ -394,7 +394,7 @@ bool HLSKernelVisitor::visitOp(MergeOp op) {
 // BLASOps Handler
 //===----------------------------------------------------------------------===//

-// Only default attributes configuration are supported.
+/// Only default attributes configuration are supported.
 bool HLSKernelVisitor::visitOp(GemmOp op) {
  auto alpha = op.getOperand(0);
  auto beta = op.getOperand(1);
--- a/test/Conversion/HLSKernelToAffine/test_conv.mlir
+++ b/test/Conversion/HLSKernelToAffine/test_conv.mlir
@ -2,6 +2,6 @@

 // CHECK: module {
 func @test_conv(%I: memref<10x3x32x32xf32>, %K: memref<16x3x5x5xf32>, %B: memref<16xf32>, %O: memref<10x16x32x32xf32>) -> () {
-  "hlskernel.conv" (%I, %K, %B, %O) {padding=dense<[[2, 2], [2, 2]]>: tensor<2x2xi64>} : (memref<10x3x32x32xf32>, memref<16x3x5x5xf32>, memref<16xf32>, memref<10x16x32x32xf32>) -> ()
+  "hlskernel.conv" (%I, %K, %B, %O) {padding=[2, 2, 2, 2]} : (memref<10x3x32x32xf32>, memref<16x3x5x5xf32>, memref<16xf32>, memref<10x16x32x32xf32>) -> ()
  return
 }
--- a/tools/benchmark-gen/benchmark-gen.cpp
+++ b/tools/benchmark-gen/benchmark-gen.cpp
@ -26,7 +26,7 @@ using namespace hlskernel;

 static llvm::cl::opt<std::string>
    benchmarkType("type", llvm::cl::desc("Benchmark type"),
-                  llvm::cl::value_desc("cnn/image"), llvm::cl::init("cnn"));
+                  llvm::cl::value_desc("cnn/blas/isp"), llvm::cl::init("cnn"));

 static llvm::cl::opt<std::string>
    configFilename("config", llvm::cl::desc("Configuration filename"),
@ -41,31 +41,174 @@ static llvm::cl::opt<std::string>
    outputFilename("o", llvm::cl::desc("Output filename"),
                   llvm::cl::value_desc("filename"), llvm::cl::init("-"));

-static LogicalResult benchmarkGen(raw_ostream &os) {
-  MLIRContext context;
-  context.loadDialect<HLSKernelDialect>();
-  auto module = ModuleOp::create(UnknownLoc::get(&context));
+namespace {
+/// Class for automatically generating benchmarks.
+class BenchmarkGenerator {
+public:
+  explicit BenchmarkGenerator(raw_ostream &os, ModuleOp &module)
+      : os(os), module(module) {}
+
+  raw_ostream &os;
+  ModuleOp &module;
+
+  /// Methods for generating various types of benchmarks.
+  LogicalResult genCNN(INIReader config);
+  LogicalResult genBLAS(INIReader config) { return failure(); }
+  LogicalResult genISP(INIReader config) { return failure(); }
+}; // namespace
+} // namespace
+
+/// Currently bypass have not been supported.
+LogicalResult BenchmarkGenerator::genCNN(INIReader config) {
+  // Parse configuration file.
+  if (config.ParseError())
+    llvm::outs() << "error: cnn configuration file parse fail\n";
+
+  const auto inputChannel = config.GetInteger("config", "inputChannel", 3);
+  const auto inputHeight = config.GetInteger("config", "inputHeight", 224);
+  const auto inputWidth = config.GetInteger("config", "inputWidth", 224);
+  const auto outputChannel = config.GetInteger("config", "outputChannel", 1000);
+
+  const auto batchSize = config.GetInteger("config", "batchSize", 1);
+  const auto minChannel = config.GetInteger("config", "minChannel", 64);
+  const auto maxChannel = config.GetInteger("config", "maxChannel", 512);
+  const auto poolingNumber = config.GetInteger("config", "poolingNumber", 5);
+  // const auto bypassNumber = config.GetInteger("config", "bypassNumber", 0);
+
+  // Create a new builder in the target module.
  OpBuilder builder(module.getBodyRegion());
+  auto loc = module.getLoc();
+  std::srand(std::time(nullptr));

+  // Helpers.
+  auto getMemType = [&](std::initializer_list<int64_t> shape) {
+    return MemRefType::get(shape, builder.getF32Type());
+  };
+
+  auto getKernelShape = [&]() { return std::rand() % 3 * 2 + 3; };
+
+  auto getChannel = [&](int current) {
+    if (std::rand() % 4 == 0 && current < maxChannel)
+      return current * 2;
+    else
+      return current;
+  };
+
+  auto getPoolingFlag = [&](int current) {
+    if ((std::rand() % 4 == 0 || current == 0) && current < poolingNumber)
+      return true;
+    else
+      return false;
+  };
+
+  // Generate function signature and create a new function.
+  SmallVector<mlir::Type, 2> inputTypes;
+  inputTypes.push_back(
+      getMemType({batchSize, inputChannel, inputHeight, inputWidth}));
+  inputTypes.push_back(getMemType({outputChannel}));
+  SmallVector<mlir::Type, 2> outputTypes;
+
+  auto func = builder.create<FuncOp>(
+      loc, "auto_gen_cnn", builder.getFunctionType(inputTypes, outputTypes));
+  func.addEntryBlock();
+  builder.setInsertionPointToStart(&func.front());
+
+  // Initialize status registers.
+  int poolingCount = 0;
+  bool poolingFlag = getPoolingFlag(poolingCount);
+  int kernelShape = getKernelShape();
+  int padding = (kernelShape - 1) / 2;
+
+  int topChannel = inputChannel;
+  int topHeight = inputHeight;
+  int topWidth = inputWidth;
+
+  int btmChannel = minChannel;
+  int btmHeight = poolingFlag ? topHeight / 2 : topHeight;
+  int btmWidth = poolingFlag ? topWidth / 2 : topWidth;
+
+  // Memory references.
+  SmallVector<mlir::Value, 32> fmaps;
+  SmallVector<mlir::Value, 32> kernels;
+  SmallVector<mlir::Value, 32> biases;
+  fmaps.push_back(func.getArgument(0));
+
+  // Generate CNN model.
+  while (poolingCount < poolingNumber || btmChannel < maxChannel) {
+    // Create convolutional layer.
+    fmaps.push_back(builder.create<mlir::AllocOp>(
+        loc, getMemType({batchSize, btmChannel, topHeight, topWidth})));
+    kernels.push_back(builder.create<mlir::AllocOp>(
+        loc, getMemType({btmChannel, topChannel, kernelShape, kernelShape})));
+    biases.push_back(
+        builder.create<mlir::AllocOp>(loc, getMemType({btmChannel})));
+
+    builder.create<ConvOp>(
+        loc, *std::prev(fmaps.end(), 2), kernels.back(), biases.back(),
+        fmaps.back(), builder.getI64ArrayAttr({1, 1}),
+        builder.getI64ArrayAttr({padding, padding, padding, padding}));
+
+    // Create ReLU layer.
+    fmaps.push_back(builder.create<mlir::AllocOp>(
+        loc, getMemType({batchSize, btmChannel, topHeight, topWidth})));
+    builder.create<ReluOp>(loc, *std::prev(fmaps.end(), 2), fmaps.back());
+
+    // Create max pooling layer if applied.
+    if (poolingFlag) {
+      fmaps.push_back(builder.create<mlir::AllocOp>(
+          loc, getMemType({batchSize, btmChannel, btmHeight, btmWidth})));
+      builder.create<MaxPoolOp>(loc, *std::prev(fmaps.end(), 2), fmaps.back(),
+                                builder.getI64ArrayAttr({2, 2}),
+                                builder.getI64ArrayAttr({2, 2}),
+                                builder.getI64ArrayAttr({0, 0, 0, 0}));
+    }
+
+    // Update status registers.
+    poolingCount = poolingFlag ? poolingCount + 1 : poolingCount;
+    poolingFlag = getPoolingFlag(poolingCount);
+    kernelShape = getKernelShape();
+    padding = (kernelShape - 1) / 2;
+
+    topChannel = btmChannel;
+    topHeight = btmHeight;
+    topWidth = btmWidth;
+
+    btmChannel = getChannel(topChannel);
+    btmHeight = poolingFlag ? topHeight / 2 : topHeight;
+    btmWidth = poolingFlag ? topWidth / 2 : topWidth;
+  }
+
+  // TODO: Create the last dense layer.
+
+  builder.create<mlir::ReturnOp>(loc);
+
+  os << module << "\n";
+  return success();
+}
+
+static LogicalResult processBenchmarkGen(raw_ostream &os) {
+  // Create a new MLIR context and module.
+  MLIRContext context;
+  context.loadDialect<StandardOpsDialect, HLSKernelDialect>();
+  auto module = ModuleOp::create(UnknownLoc::get(&context));
+  BenchmarkGenerator generator(os, module);
+
+  // Generate corresponding benchmark.
  if (benchmarkType == "cnn") {
-    INIReader cnnConfig(configFilename);
-    if (cnnConfig.ParseError())
-      llvm::outs() << "error: cnn configuration file parse fail\n";
+    INIReader config(configFilename);
+    return generator.genCNN(config);

-    auto inputHeight = cnnConfig.GetInteger("config", "inputHeight", 224);
-    llvm::outs() << inputHeight << "\n";
+  } else if (benchmarkType == "blas") {
+    INIReader config(configFilename);
+    return generator.genBLAS(config);

-    SmallVector<mlir::Type, 4> types;
-    builder.create<FuncOp>(module.getLoc(), "new_func",
-                           builder.getFunctionType(types, types));
+  } else if (benchmarkType == "isp") {
+    INIReader config(configFilename);
+    return generator.genBLAS(config);

-    module.print(os);
-    os << "\n\n";
-  } else if (benchmarkType == "image") {
  } else {
    return failure();
  }
-  return success();
 }

 int main(int argc, char **argv) {
@ -87,7 +230,8 @@ int main(int argc, char **argv) {
    exit(1);
  }

-  if (failed(benchmarkGen(output->os()))) {
+  // Process benchmark generation.
+  if (failed(processBenchmarkGen(output->os()))) {
    return 1;
  }