[BenchmarkGen] support generate CNN benchmarks without considering the last dense layer (#15)

This commit is contained in:
Hanchen Ye 2020-12-01 18:31:23 -06:00
parent ea7b6a1495
commit e5cd6673e7
5 changed files with 184 additions and 35 deletions

View File

@ -1,6 +1,13 @@
[config]
# input and output information
inputChannel=3
inputHeight=224
inputWidth=224
inputChannel=3
outputChannel=1000
# Generation rules
batchSize=1
minChannel=64
maxChannel=512
poolingNumber=5
bypassNumber=0

View File

@ -36,10 +36,8 @@ def ConvOp : HLSKernelOp<"conv", [HLSKernelOpInterface]> {
Strides and padding are integer attributes of the same rank as the number of
window dimensions. The padding attribute specifies the amount of zero
padding to be applied to the base area, which is a n-d array of (low, high)
padding. Each pair has the low padding as the first element and the high
padding as the second element. Using padding is equivalent to inserting
those same zero values into the input before the convolution.
padding to be applied to the base area, which is a 4-d array of (top,
bottom, left, right) padding.
}];
let arguments = (ins
@ -48,7 +46,7 @@ def ConvOp : HLSKernelOp<"conv", [HLSKernelOpInterface]> {
AnyMemRef:$B,
AnyMemRef:$O,
OptionalAttr<I64ArrayAttr>:$strides,
OptionalAttr<I64ElementsAttr>:$padding
OptionalAttr<I64ArrayAttr>:$padding
);
}
@ -67,7 +65,7 @@ def MaxPoolOp : HLSKernelOp<"maxpool", [HLSKernelOpInterface]> {
AnyMemRef:$O,
OptionalAttr<I64ArrayAttr>:$kernel_shape,
OptionalAttr<I64ArrayAttr>:$strides,
OptionalAttr<I64ElementsAttr>:$padding
OptionalAttr<I64ArrayAttr>:$padding
);
}

View File

@ -45,15 +45,15 @@ private:
OpBuilder &builder;
Location loc;
// Helpers for creating loops.
// Constant upper and lower bound.
/// Helpers for creating loops.
/// Constant upper and lower bound.
Value createLoop(int64_t lower, int64_t upper, int64_t step = 1) {
auto loop = builder.create<mlir::AffineForOp>(loc, lower, upper, step);
builder.setInsertionPointToStart(&loop.getLoopBody().front());
return loop.getInductionVar();
}
// General case.
/// General case loop boundary.
Value createLoop(std::initializer_list<Value> lower, AffineMap lowerMap,
std::initializer_list<Value> upper, AffineMap upperMap,
int64_t step = 1) {
@ -80,7 +80,7 @@ private:
return createLoop({}, lowerMap, {upper}, upperMap);
}
// Helpers for creating constant, loads, stores and binary operations.
/// Helpers for creating constant, loads, stores and binary operations.
Value createConst(int64_t val, Type valType) {
if (valType.isa<IntegerType>())
return builder.create<mlir::ConstantOp>(
@ -170,8 +170,8 @@ bool HLSKernelVisitor::visitOp(DenseOp op) {
/// Padding and strides has not been suppored.
bool HLSKernelVisitor::visitOp(ConvOp op) {
SmallVector<int64_t, 4> padding;
for (auto pad : op.getAttrOfType<DenseIntElementsAttr>("padding"))
padding.push_back(pad.getSExtValue());
for (auto pad : op.getAttrOfType<ArrayAttr>("padding"))
padding.push_back(pad.cast<IntegerAttr>().getInt());
auto I = op.getOperand(0);
auto K = op.getOperand(1);
@ -257,8 +257,8 @@ bool HLSKernelVisitor::visitOp(ConvOp op) {
return true;
}
// Padding and strides has not been suppored. Only support when kernel size is
// equal to stride size.
/// Padding and strides has not been suppored. Only support when kernel size is
/// equal to stride size.
bool HLSKernelVisitor::visitOp(MaxPoolOp op) {
SmallVector<int64_t, 2> kernelShape;
for (auto shape : op.getAttrOfType<ArrayAttr>("kernel_shape"))
@ -394,7 +394,7 @@ bool HLSKernelVisitor::visitOp(MergeOp op) {
// BLASOps Handler
//===----------------------------------------------------------------------===//
// Only default attributes configuration are supported.
/// Only default attributes configuration are supported.
bool HLSKernelVisitor::visitOp(GemmOp op) {
auto alpha = op.getOperand(0);
auto beta = op.getOperand(1);

View File

@ -2,6 +2,6 @@
// CHECK: module {
func @test_conv(%I: memref<10x3x32x32xf32>, %K: memref<16x3x5x5xf32>, %B: memref<16xf32>, %O: memref<10x16x32x32xf32>) -> () {
"hlskernel.conv" (%I, %K, %B, %O) {padding=dense<[[2, 2], [2, 2]]>: tensor<2x2xi64>} : (memref<10x3x32x32xf32>, memref<16x3x5x5xf32>, memref<16xf32>, memref<10x16x32x32xf32>) -> ()
"hlskernel.conv" (%I, %K, %B, %O) {padding=[2, 2, 2, 2]} : (memref<10x3x32x32xf32>, memref<16x3x5x5xf32>, memref<16xf32>, memref<10x16x32x32xf32>) -> ()
return
}

View File

@ -26,7 +26,7 @@ using namespace hlskernel;
static llvm::cl::opt<std::string>
benchmarkType("type", llvm::cl::desc("Benchmark type"),
llvm::cl::value_desc("cnn/image"), llvm::cl::init("cnn"));
llvm::cl::value_desc("cnn/blas/isp"), llvm::cl::init("cnn"));
static llvm::cl::opt<std::string>
configFilename("config", llvm::cl::desc("Configuration filename"),
@ -41,31 +41,174 @@ static llvm::cl::opt<std::string>
outputFilename("o", llvm::cl::desc("Output filename"),
llvm::cl::value_desc("filename"), llvm::cl::init("-"));
static LogicalResult benchmarkGen(raw_ostream &os) {
MLIRContext context;
context.loadDialect<HLSKernelDialect>();
auto module = ModuleOp::create(UnknownLoc::get(&context));
namespace {
/// Class for automatically generating benchmarks.
class BenchmarkGenerator {
public:
explicit BenchmarkGenerator(raw_ostream &os, ModuleOp &module)
: os(os), module(module) {}
raw_ostream &os;
ModuleOp &module;
/// Methods for generating various types of benchmarks.
LogicalResult genCNN(INIReader config);
LogicalResult genBLAS(INIReader config) { return failure(); }
LogicalResult genISP(INIReader config) { return failure(); }
}; // namespace
} // namespace
/// Currently bypass have not been supported.
LogicalResult BenchmarkGenerator::genCNN(INIReader config) {
// Parse configuration file.
if (config.ParseError())
llvm::outs() << "error: cnn configuration file parse fail\n";
const auto inputChannel = config.GetInteger("config", "inputChannel", 3);
const auto inputHeight = config.GetInteger("config", "inputHeight", 224);
const auto inputWidth = config.GetInteger("config", "inputWidth", 224);
const auto outputChannel = config.GetInteger("config", "outputChannel", 1000);
const auto batchSize = config.GetInteger("config", "batchSize", 1);
const auto minChannel = config.GetInteger("config", "minChannel", 64);
const auto maxChannel = config.GetInteger("config", "maxChannel", 512);
const auto poolingNumber = config.GetInteger("config", "poolingNumber", 5);
// const auto bypassNumber = config.GetInteger("config", "bypassNumber", 0);
// Create a new builder in the target module.
OpBuilder builder(module.getBodyRegion());
auto loc = module.getLoc();
std::srand(std::time(nullptr));
// Helpers.
auto getMemType = [&](std::initializer_list<int64_t> shape) {
return MemRefType::get(shape, builder.getF32Type());
};
auto getKernelShape = [&]() { return std::rand() % 3 * 2 + 3; };
auto getChannel = [&](int current) {
if (std::rand() % 4 == 0 && current < maxChannel)
return current * 2;
else
return current;
};
auto getPoolingFlag = [&](int current) {
if ((std::rand() % 4 == 0 || current == 0) && current < poolingNumber)
return true;
else
return false;
};
// Generate function signature and create a new function.
SmallVector<mlir::Type, 2> inputTypes;
inputTypes.push_back(
getMemType({batchSize, inputChannel, inputHeight, inputWidth}));
inputTypes.push_back(getMemType({outputChannel}));
SmallVector<mlir::Type, 2> outputTypes;
auto func = builder.create<FuncOp>(
loc, "auto_gen_cnn", builder.getFunctionType(inputTypes, outputTypes));
func.addEntryBlock();
builder.setInsertionPointToStart(&func.front());
// Initialize status registers.
int poolingCount = 0;
bool poolingFlag = getPoolingFlag(poolingCount);
int kernelShape = getKernelShape();
int padding = (kernelShape - 1) / 2;
int topChannel = inputChannel;
int topHeight = inputHeight;
int topWidth = inputWidth;
int btmChannel = minChannel;
int btmHeight = poolingFlag ? topHeight / 2 : topHeight;
int btmWidth = poolingFlag ? topWidth / 2 : topWidth;
// Memory references.
SmallVector<mlir::Value, 32> fmaps;
SmallVector<mlir::Value, 32> kernels;
SmallVector<mlir::Value, 32> biases;
fmaps.push_back(func.getArgument(0));
// Generate CNN model.
while (poolingCount < poolingNumber || btmChannel < maxChannel) {
// Create convolutional layer.
fmaps.push_back(builder.create<mlir::AllocOp>(
loc, getMemType({batchSize, btmChannel, topHeight, topWidth})));
kernels.push_back(builder.create<mlir::AllocOp>(
loc, getMemType({btmChannel, topChannel, kernelShape, kernelShape})));
biases.push_back(
builder.create<mlir::AllocOp>(loc, getMemType({btmChannel})));
builder.create<ConvOp>(
loc, *std::prev(fmaps.end(), 2), kernels.back(), biases.back(),
fmaps.back(), builder.getI64ArrayAttr({1, 1}),
builder.getI64ArrayAttr({padding, padding, padding, padding}));
// Create ReLU layer.
fmaps.push_back(builder.create<mlir::AllocOp>(
loc, getMemType({batchSize, btmChannel, topHeight, topWidth})));
builder.create<ReluOp>(loc, *std::prev(fmaps.end(), 2), fmaps.back());
// Create max pooling layer if applied.
if (poolingFlag) {
fmaps.push_back(builder.create<mlir::AllocOp>(
loc, getMemType({batchSize, btmChannel, btmHeight, btmWidth})));
builder.create<MaxPoolOp>(loc, *std::prev(fmaps.end(), 2), fmaps.back(),
builder.getI64ArrayAttr({2, 2}),
builder.getI64ArrayAttr({2, 2}),
builder.getI64ArrayAttr({0, 0, 0, 0}));
}
// Update status registers.
poolingCount = poolingFlag ? poolingCount + 1 : poolingCount;
poolingFlag = getPoolingFlag(poolingCount);
kernelShape = getKernelShape();
padding = (kernelShape - 1) / 2;
topChannel = btmChannel;
topHeight = btmHeight;
topWidth = btmWidth;
btmChannel = getChannel(topChannel);
btmHeight = poolingFlag ? topHeight / 2 : topHeight;
btmWidth = poolingFlag ? topWidth / 2 : topWidth;
}
// TODO: Create the last dense layer.
builder.create<mlir::ReturnOp>(loc);
os << module << "\n";
return success();
}
static LogicalResult processBenchmarkGen(raw_ostream &os) {
// Create a new MLIR context and module.
MLIRContext context;
context.loadDialect<StandardOpsDialect, HLSKernelDialect>();
auto module = ModuleOp::create(UnknownLoc::get(&context));
BenchmarkGenerator generator(os, module);
// Generate corresponding benchmark.
if (benchmarkType == "cnn") {
INIReader cnnConfig(configFilename);
if (cnnConfig.ParseError())
llvm::outs() << "error: cnn configuration file parse fail\n";
INIReader config(configFilename);
return generator.genCNN(config);
auto inputHeight = cnnConfig.GetInteger("config", "inputHeight", 224);
llvm::outs() << inputHeight << "\n";
} else if (benchmarkType == "blas") {
INIReader config(configFilename);
return generator.genBLAS(config);
SmallVector<mlir::Type, 4> types;
builder.create<FuncOp>(module.getLoc(), "new_func",
builder.getFunctionType(types, types));
} else if (benchmarkType == "isp") {
INIReader config(configFilename);
return generator.genBLAS(config);
module.print(os);
os << "\n\n";
} else if (benchmarkType == "image") {
} else {
return failure();
}
return success();
}
int main(int argc, char **argv) {
@ -87,7 +230,8 @@ int main(int argc, char **argv) {
exit(1);
}
if (failed(benchmarkGen(output->os()))) {
// Process benchmark generation.
if (failed(processBenchmarkGen(output->os()))) {
return 1;
}