[BenchmarkGen] support generating the last dense layer, and impl corresponding affine lowering (#15)
This commit is contained in:
parent
e5cd6673e7
commit
192e3e3fd7
|
@ -34,10 +34,14 @@ After the installation and test successfully completed, you should be able to pl
|
|||
```sh
|
||||
$ export PATH=$SCALEHLS_DIR/build/bin:$PATH
|
||||
$ cd $SCALEHLS_DIR
|
||||
$
|
||||
$ benchmark-gen -type "cnn" -config "$SCALEHLS_DIR/config/cnn-config.ini" -number 1
|
||||
$ scalehls-opt -hlskernel-to-affine test/Conversion/HLSKernelToAffine/test_*.mlir
|
||||
$
|
||||
$ scalehls-opt -convert-to-hlscpp test/Conversion/ConvertToHLSCpp/test_*.mlir
|
||||
$ scalehls-opt -convert-to-hlscpp test/EmitHLSCpp/test_*.mlir | scalehls-translate -emit-hlscpp
|
||||
$
|
||||
$ scalehls-opt -qor-estimation test/Analysis/QoREstimation/test_for.mlir
|
||||
$ scalehls-opt -hlskernel-to-affine test/Conversion/HLSKernelToAffine/test_*.mlir
|
||||
```
|
||||
|
||||
## References
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
[config]
|
||||
# input and output information
|
||||
inputChannel=3
|
||||
inputHeight=224
|
||||
inputWidth=224
|
||||
outputChannel=1000
|
||||
inputHeight=32
|
||||
inputWidth=32
|
||||
outputChannel=10
|
||||
|
||||
# Generation rules
|
||||
batchSize=1
|
||||
minChannel=64
|
||||
maxChannel=512
|
||||
poolingNumber=5
|
||||
minChannel=8
|
||||
maxChannel=64
|
||||
poolingNumber=3
|
||||
bypassNumber=0
|
||||
|
|
|
@ -10,8 +10,8 @@ def DenseOp : HLSKernelOp<"dense", [HLSKernelOpInterface]> {
|
|||
let description = [{
|
||||
Dense layer (fully-connected layer).
|
||||
|
||||
I: (N, C)
|
||||
K: (F, C)
|
||||
I: (N, C) or (N, C, H, W)
|
||||
K: (F, C) or (F, C, H, W)
|
||||
B: (F)
|
||||
O: (N, F)
|
||||
}];
|
||||
|
|
|
@ -135,7 +135,7 @@ bool HLSKernelVisitor::visitOp(DenseOp op) {
|
|||
auto B = op.getOperand(2);
|
||||
auto O = op.getOperand(3);
|
||||
|
||||
auto KShape = K.getType().cast<MemRefType>().getShape();
|
||||
auto IShape = I.getType().cast<MemRefType>().getShape();
|
||||
auto OShape = O.getType().cast<MemRefType>().getShape();
|
||||
|
||||
// Set insertion point of builder.
|
||||
|
@ -145,24 +145,42 @@ bool HLSKernelVisitor::visitOp(DenseOp op) {
|
|||
auto n = createLoop(0, OShape[0]);
|
||||
|
||||
// Create output channel loop.
|
||||
auto f = createLoop(0, KShape[0]);
|
||||
auto f = createLoop(0, OShape[1]);
|
||||
|
||||
// Load bias into O array.
|
||||
auto bias = createLoad(B, {f});
|
||||
createStore(bias, O, {n, f});
|
||||
|
||||
// Create input channel loop.
|
||||
auto c = createLoop(0, KShape[1]);
|
||||
auto c = createLoop(0, IShape[1]);
|
||||
|
||||
// Fetch feature map, kernel and carry out multiplication.
|
||||
auto fmap = createLoad(I, {n, c});
|
||||
auto kernel = createLoad(K, {f, c});
|
||||
auto mult = createBinaryOp<mlir::MulFOp>(fmap, kernel);
|
||||
if (IShape.size() == 2) {
|
||||
// Fetch feature map, kernel and carry out multiplication.
|
||||
auto fmap = createLoad(I, {n, c});
|
||||
auto kernel = createLoad(K, {f, c});
|
||||
auto mult = createBinaryOp<mlir::MulFOp>(fmap, kernel);
|
||||
|
||||
// Fetch partial result and carry out accumulation.
|
||||
auto partial = createLoad(O, {n, f});
|
||||
auto accum = createBinaryOp<mlir::AddFOp>(partial, mult);
|
||||
createStore(accum, O, {n, f});
|
||||
// Fetch partial result and carry out accumulation.
|
||||
auto partial = createLoad(O, {n, f});
|
||||
auto accum = createBinaryOp<mlir::AddFOp>(partial, mult);
|
||||
createStore(accum, O, {n, f});
|
||||
} else {
|
||||
// Create kernel height loop.
|
||||
auto r = createLoop(0, IShape[2]);
|
||||
|
||||
// Create kernel width loop.
|
||||
auto s = createLoop(0, IShape[3]);
|
||||
|
||||
// Fetch feature map, kernel and carry out multiplication.
|
||||
auto fmap = createLoad(I, {n, c, r, s});
|
||||
auto kernel = createLoad(K, {f, c, r, s});
|
||||
auto mult = createBinaryOp<mlir::MulFOp>(fmap, kernel);
|
||||
|
||||
// Fetch partial result and carry out accumulation.
|
||||
auto partial = createLoad(O, {n, f});
|
||||
auto accum = createBinaryOp<mlir::AddFOp>(partial, mult);
|
||||
createStore(accum, O, {n, f});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -105,7 +105,7 @@ LogicalResult BenchmarkGenerator::genCNN(INIReader config) {
|
|||
SmallVector<mlir::Type, 2> inputTypes;
|
||||
inputTypes.push_back(
|
||||
getMemType({batchSize, inputChannel, inputHeight, inputWidth}));
|
||||
inputTypes.push_back(getMemType({outputChannel}));
|
||||
inputTypes.push_back(getMemType({batchSize, outputChannel}));
|
||||
SmallVector<mlir::Type, 2> outputTypes;
|
||||
|
||||
auto func = builder.create<FuncOp>(
|
||||
|
@ -178,10 +178,17 @@ LogicalResult BenchmarkGenerator::genCNN(INIReader config) {
|
|||
btmWidth = poolingFlag ? topWidth / 2 : topWidth;
|
||||
}
|
||||
|
||||
// TODO: Create the last dense layer.
|
||||
// Create the last dense layer.
|
||||
fmaps.push_back(func.getArgument(1));
|
||||
kernels.push_back(builder.create<mlir::AllocOp>(
|
||||
loc, getMemType({outputChannel, topChannel, topHeight, topWidth})));
|
||||
biases.push_back(
|
||||
builder.create<mlir::AllocOp>(loc, getMemType({outputChannel})));
|
||||
|
||||
builder.create<DenseOp>(loc, *std::prev(fmaps.end(), 2), kernels.back(),
|
||||
biases.back(), fmaps.back());
|
||||
|
||||
builder.create<mlir::ReturnOp>(loc);
|
||||
|
||||
os << module << "\n";
|
||||
return success();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue