[BenchmarkGen] support generate CNN benchmarks without considering the last dense layer (#15)
This commit is contained in:
parent
ea7b6a1495
commit
e5cd6673e7
|
@ -1,6 +1,13 @@
|
|||
[config]
|
||||
# input and output information
|
||||
inputChannel=3
|
||||
inputHeight=224
|
||||
inputWidth=224
|
||||
inputChannel=3
|
||||
outputChannel=1000
|
||||
|
||||
# Generation rules
|
||||
batchSize=1
|
||||
minChannel=64
|
||||
maxChannel=512
|
||||
poolingNumber=5
|
||||
bypassNumber=0
|
||||
|
|
|
@ -36,10 +36,8 @@ def ConvOp : HLSKernelOp<"conv", [HLSKernelOpInterface]> {
|
|||
|
||||
Strides and padding are integer attributes of the same rank as the number of
|
||||
window dimensions. The padding attribute specifies the amount of zero
|
||||
padding to be applied to the base area, which is a n-d array of (low, high)
|
||||
padding. Each pair has the low padding as the first element and the high
|
||||
padding as the second element. Using padding is equivalent to inserting
|
||||
those same zero values into the input before the convolution.
|
||||
padding to be applied to the base area, which is a 4-d array of (top,
|
||||
bottom, left, right) padding.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
|
@ -48,7 +46,7 @@ def ConvOp : HLSKernelOp<"conv", [HLSKernelOpInterface]> {
|
|||
AnyMemRef:$B,
|
||||
AnyMemRef:$O,
|
||||
OptionalAttr<I64ArrayAttr>:$strides,
|
||||
OptionalAttr<I64ElementsAttr>:$padding
|
||||
OptionalAttr<I64ArrayAttr>:$padding
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -67,7 +65,7 @@ def MaxPoolOp : HLSKernelOp<"maxpool", [HLSKernelOpInterface]> {
|
|||
AnyMemRef:$O,
|
||||
OptionalAttr<I64ArrayAttr>:$kernel_shape,
|
||||
OptionalAttr<I64ArrayAttr>:$strides,
|
||||
OptionalAttr<I64ElementsAttr>:$padding
|
||||
OptionalAttr<I64ArrayAttr>:$padding
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -45,15 +45,15 @@ private:
|
|||
OpBuilder &builder;
|
||||
Location loc;
|
||||
|
||||
// Helpers for creating loops.
|
||||
// Constant upper and lower bound.
|
||||
/// Helpers for creating loops.
|
||||
/// Constant upper and lower bound.
|
||||
Value createLoop(int64_t lower, int64_t upper, int64_t step = 1) {
|
||||
auto loop = builder.create<mlir::AffineForOp>(loc, lower, upper, step);
|
||||
builder.setInsertionPointToStart(&loop.getLoopBody().front());
|
||||
return loop.getInductionVar();
|
||||
}
|
||||
|
||||
// General case.
|
||||
/// General case loop boundary.
|
||||
Value createLoop(std::initializer_list<Value> lower, AffineMap lowerMap,
|
||||
std::initializer_list<Value> upper, AffineMap upperMap,
|
||||
int64_t step = 1) {
|
||||
|
@ -80,7 +80,7 @@ private:
|
|||
return createLoop({}, lowerMap, {upper}, upperMap);
|
||||
}
|
||||
|
||||
// Helpers for creating constant, loads, stores and binary operations.
|
||||
/// Helpers for creating constant, loads, stores and binary operations.
|
||||
Value createConst(int64_t val, Type valType) {
|
||||
if (valType.isa<IntegerType>())
|
||||
return builder.create<mlir::ConstantOp>(
|
||||
|
@ -170,8 +170,8 @@ bool HLSKernelVisitor::visitOp(DenseOp op) {
|
|||
/// Padding and strides has not been suppored.
|
||||
bool HLSKernelVisitor::visitOp(ConvOp op) {
|
||||
SmallVector<int64_t, 4> padding;
|
||||
for (auto pad : op.getAttrOfType<DenseIntElementsAttr>("padding"))
|
||||
padding.push_back(pad.getSExtValue());
|
||||
for (auto pad : op.getAttrOfType<ArrayAttr>("padding"))
|
||||
padding.push_back(pad.cast<IntegerAttr>().getInt());
|
||||
|
||||
auto I = op.getOperand(0);
|
||||
auto K = op.getOperand(1);
|
||||
|
@ -257,8 +257,8 @@ bool HLSKernelVisitor::visitOp(ConvOp op) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Padding and strides has not been suppored. Only support when kernel size is
|
||||
// equal to stride size.
|
||||
/// Padding and strides has not been suppored. Only support when kernel size is
|
||||
/// equal to stride size.
|
||||
bool HLSKernelVisitor::visitOp(MaxPoolOp op) {
|
||||
SmallVector<int64_t, 2> kernelShape;
|
||||
for (auto shape : op.getAttrOfType<ArrayAttr>("kernel_shape"))
|
||||
|
@ -394,7 +394,7 @@ bool HLSKernelVisitor::visitOp(MergeOp op) {
|
|||
// BLASOps Handler
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Only default attributes configuration are supported.
|
||||
/// Only default attributes configuration are supported.
|
||||
bool HLSKernelVisitor::visitOp(GemmOp op) {
|
||||
auto alpha = op.getOperand(0);
|
||||
auto beta = op.getOperand(1);
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
|
||||
// CHECK: module {
|
||||
func @test_conv(%I: memref<10x3x32x32xf32>, %K: memref<16x3x5x5xf32>, %B: memref<16xf32>, %O: memref<10x16x32x32xf32>) -> () {
|
||||
"hlskernel.conv" (%I, %K, %B, %O) {padding=dense<[[2, 2], [2, 2]]>: tensor<2x2xi64>} : (memref<10x3x32x32xf32>, memref<16x3x5x5xf32>, memref<16xf32>, memref<10x16x32x32xf32>) -> ()
|
||||
"hlskernel.conv" (%I, %K, %B, %O) {padding=[2, 2, 2, 2]} : (memref<10x3x32x32xf32>, memref<16x3x5x5xf32>, memref<16xf32>, memref<10x16x32x32xf32>) -> ()
|
||||
return
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ using namespace hlskernel;
|
|||
|
||||
static llvm::cl::opt<std::string>
|
||||
benchmarkType("type", llvm::cl::desc("Benchmark type"),
|
||||
llvm::cl::value_desc("cnn/image"), llvm::cl::init("cnn"));
|
||||
llvm::cl::value_desc("cnn/blas/isp"), llvm::cl::init("cnn"));
|
||||
|
||||
static llvm::cl::opt<std::string>
|
||||
configFilename("config", llvm::cl::desc("Configuration filename"),
|
||||
|
@ -41,31 +41,174 @@ static llvm::cl::opt<std::string>
|
|||
outputFilename("o", llvm::cl::desc("Output filename"),
|
||||
llvm::cl::value_desc("filename"), llvm::cl::init("-"));
|
||||
|
||||
static LogicalResult benchmarkGen(raw_ostream &os) {
|
||||
MLIRContext context;
|
||||
context.loadDialect<HLSKernelDialect>();
|
||||
auto module = ModuleOp::create(UnknownLoc::get(&context));
|
||||
namespace {
|
||||
/// Class for automatically generating benchmarks.
|
||||
class BenchmarkGenerator {
|
||||
public:
|
||||
explicit BenchmarkGenerator(raw_ostream &os, ModuleOp &module)
|
||||
: os(os), module(module) {}
|
||||
|
||||
raw_ostream &os;
|
||||
ModuleOp &module;
|
||||
|
||||
/// Methods for generating various types of benchmarks.
|
||||
LogicalResult genCNN(INIReader config);
|
||||
LogicalResult genBLAS(INIReader config) { return failure(); }
|
||||
LogicalResult genISP(INIReader config) { return failure(); }
|
||||
}; // namespace
|
||||
} // namespace
|
||||
|
||||
/// Currently bypass have not been supported.
|
||||
LogicalResult BenchmarkGenerator::genCNN(INIReader config) {
|
||||
// Parse configuration file.
|
||||
if (config.ParseError())
|
||||
llvm::outs() << "error: cnn configuration file parse fail\n";
|
||||
|
||||
const auto inputChannel = config.GetInteger("config", "inputChannel", 3);
|
||||
const auto inputHeight = config.GetInteger("config", "inputHeight", 224);
|
||||
const auto inputWidth = config.GetInteger("config", "inputWidth", 224);
|
||||
const auto outputChannel = config.GetInteger("config", "outputChannel", 1000);
|
||||
|
||||
const auto batchSize = config.GetInteger("config", "batchSize", 1);
|
||||
const auto minChannel = config.GetInteger("config", "minChannel", 64);
|
||||
const auto maxChannel = config.GetInteger("config", "maxChannel", 512);
|
||||
const auto poolingNumber = config.GetInteger("config", "poolingNumber", 5);
|
||||
// const auto bypassNumber = config.GetInteger("config", "bypassNumber", 0);
|
||||
|
||||
// Create a new builder in the target module.
|
||||
OpBuilder builder(module.getBodyRegion());
|
||||
auto loc = module.getLoc();
|
||||
std::srand(std::time(nullptr));
|
||||
|
||||
// Helpers.
|
||||
auto getMemType = [&](std::initializer_list<int64_t> shape) {
|
||||
return MemRefType::get(shape, builder.getF32Type());
|
||||
};
|
||||
|
||||
auto getKernelShape = [&]() { return std::rand() % 3 * 2 + 3; };
|
||||
|
||||
auto getChannel = [&](int current) {
|
||||
if (std::rand() % 4 == 0 && current < maxChannel)
|
||||
return current * 2;
|
||||
else
|
||||
return current;
|
||||
};
|
||||
|
||||
auto getPoolingFlag = [&](int current) {
|
||||
if ((std::rand() % 4 == 0 || current == 0) && current < poolingNumber)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
};
|
||||
|
||||
// Generate function signature and create a new function.
|
||||
SmallVector<mlir::Type, 2> inputTypes;
|
||||
inputTypes.push_back(
|
||||
getMemType({batchSize, inputChannel, inputHeight, inputWidth}));
|
||||
inputTypes.push_back(getMemType({outputChannel}));
|
||||
SmallVector<mlir::Type, 2> outputTypes;
|
||||
|
||||
auto func = builder.create<FuncOp>(
|
||||
loc, "auto_gen_cnn", builder.getFunctionType(inputTypes, outputTypes));
|
||||
func.addEntryBlock();
|
||||
builder.setInsertionPointToStart(&func.front());
|
||||
|
||||
// Initialize status registers.
|
||||
int poolingCount = 0;
|
||||
bool poolingFlag = getPoolingFlag(poolingCount);
|
||||
int kernelShape = getKernelShape();
|
||||
int padding = (kernelShape - 1) / 2;
|
||||
|
||||
int topChannel = inputChannel;
|
||||
int topHeight = inputHeight;
|
||||
int topWidth = inputWidth;
|
||||
|
||||
int btmChannel = minChannel;
|
||||
int btmHeight = poolingFlag ? topHeight / 2 : topHeight;
|
||||
int btmWidth = poolingFlag ? topWidth / 2 : topWidth;
|
||||
|
||||
// Memory references.
|
||||
SmallVector<mlir::Value, 32> fmaps;
|
||||
SmallVector<mlir::Value, 32> kernels;
|
||||
SmallVector<mlir::Value, 32> biases;
|
||||
fmaps.push_back(func.getArgument(0));
|
||||
|
||||
// Generate CNN model.
|
||||
while (poolingCount < poolingNumber || btmChannel < maxChannel) {
|
||||
// Create convolutional layer.
|
||||
fmaps.push_back(builder.create<mlir::AllocOp>(
|
||||
loc, getMemType({batchSize, btmChannel, topHeight, topWidth})));
|
||||
kernels.push_back(builder.create<mlir::AllocOp>(
|
||||
loc, getMemType({btmChannel, topChannel, kernelShape, kernelShape})));
|
||||
biases.push_back(
|
||||
builder.create<mlir::AllocOp>(loc, getMemType({btmChannel})));
|
||||
|
||||
builder.create<ConvOp>(
|
||||
loc, *std::prev(fmaps.end(), 2), kernels.back(), biases.back(),
|
||||
fmaps.back(), builder.getI64ArrayAttr({1, 1}),
|
||||
builder.getI64ArrayAttr({padding, padding, padding, padding}));
|
||||
|
||||
// Create ReLU layer.
|
||||
fmaps.push_back(builder.create<mlir::AllocOp>(
|
||||
loc, getMemType({batchSize, btmChannel, topHeight, topWidth})));
|
||||
builder.create<ReluOp>(loc, *std::prev(fmaps.end(), 2), fmaps.back());
|
||||
|
||||
// Create max pooling layer if applied.
|
||||
if (poolingFlag) {
|
||||
fmaps.push_back(builder.create<mlir::AllocOp>(
|
||||
loc, getMemType({batchSize, btmChannel, btmHeight, btmWidth})));
|
||||
builder.create<MaxPoolOp>(loc, *std::prev(fmaps.end(), 2), fmaps.back(),
|
||||
builder.getI64ArrayAttr({2, 2}),
|
||||
builder.getI64ArrayAttr({2, 2}),
|
||||
builder.getI64ArrayAttr({0, 0, 0, 0}));
|
||||
}
|
||||
|
||||
// Update status registers.
|
||||
poolingCount = poolingFlag ? poolingCount + 1 : poolingCount;
|
||||
poolingFlag = getPoolingFlag(poolingCount);
|
||||
kernelShape = getKernelShape();
|
||||
padding = (kernelShape - 1) / 2;
|
||||
|
||||
topChannel = btmChannel;
|
||||
topHeight = btmHeight;
|
||||
topWidth = btmWidth;
|
||||
|
||||
btmChannel = getChannel(topChannel);
|
||||
btmHeight = poolingFlag ? topHeight / 2 : topHeight;
|
||||
btmWidth = poolingFlag ? topWidth / 2 : topWidth;
|
||||
}
|
||||
|
||||
// TODO: Create the last dense layer.
|
||||
|
||||
builder.create<mlir::ReturnOp>(loc);
|
||||
|
||||
os << module << "\n";
|
||||
return success();
|
||||
}
|
||||
|
||||
static LogicalResult processBenchmarkGen(raw_ostream &os) {
|
||||
// Create a new MLIR context and module.
|
||||
MLIRContext context;
|
||||
context.loadDialect<StandardOpsDialect, HLSKernelDialect>();
|
||||
auto module = ModuleOp::create(UnknownLoc::get(&context));
|
||||
BenchmarkGenerator generator(os, module);
|
||||
|
||||
// Generate corresponding benchmark.
|
||||
if (benchmarkType == "cnn") {
|
||||
INIReader cnnConfig(configFilename);
|
||||
if (cnnConfig.ParseError())
|
||||
llvm::outs() << "error: cnn configuration file parse fail\n";
|
||||
INIReader config(configFilename);
|
||||
return generator.genCNN(config);
|
||||
|
||||
auto inputHeight = cnnConfig.GetInteger("config", "inputHeight", 224);
|
||||
llvm::outs() << inputHeight << "\n";
|
||||
} else if (benchmarkType == "blas") {
|
||||
INIReader config(configFilename);
|
||||
return generator.genBLAS(config);
|
||||
|
||||
SmallVector<mlir::Type, 4> types;
|
||||
builder.create<FuncOp>(module.getLoc(), "new_func",
|
||||
builder.getFunctionType(types, types));
|
||||
} else if (benchmarkType == "isp") {
|
||||
INIReader config(configFilename);
|
||||
return generator.genBLAS(config);
|
||||
|
||||
module.print(os);
|
||||
os << "\n\n";
|
||||
} else if (benchmarkType == "image") {
|
||||
} else {
|
||||
return failure();
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
@ -87,7 +230,8 @@ int main(int argc, char **argv) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
if (failed(benchmarkGen(output->os()))) {
|
||||
// Process benchmark generation.
|
||||
if (failed(processBenchmarkGen(output->os()))) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue