[Samples] add array-partition into ablation study; [ArrayPartition] support AffineStoreOp; bug fixes in EmitHLSCpp and ConvertToHLSCpp
This commit is contained in:
parent
6a504e576a
commit
cbadc8f831
|
@ -46,12 +46,12 @@ $ scalehls-opt -qor-estimation test/Analysis/QoREstimation/test_for.mlir
|
|||
```
|
||||
|
||||
### 4. Ablation study
|
||||
If Vivado HLS (2019.1 tested) is installed on your machine, running the following script will report the HLS results for some benchmarks (around 2 hours on AMD Ryzen7 3800X for all 16 tests).
|
||||
If Vivado HLS (2019.1 tested) is installed on your machine, running the following script will report the HLS results for some benchmarks (around 8 hours on AMD Ryzen7 3800X for all 33 tests).
|
||||
|
||||
For the `ablation_test_run.sh` script, `-n` determines the number of tests to be processed, the maximum supported value of which is 16; `-c` determines whether to run Vivado HLS C synthesis; `-r` determines whether to run report generation. The generated C++ source code will be written to `sample/cpp_src`; the Vivado HLS project will be established in `sample/hls_proj`; the generated report will be written to `sample/test_results`.
|
||||
For the `ablation_test_run.sh` script, `-n` determines the number of tests to be processed, the maximum supported value of which is 33; `-c` determines from which test to begin to rerun the C++ synthesis and report collection. The generated C++ source code will be written to `sample/cpp_src`; the Vivado HLS project will be established in `sample/hls_proj`; the collected report will be written to `sample/test_results`; the test summary will be generated to `sample`.
|
||||
```sh
|
||||
$ cd $SCALEHLS_DIR/sample
|
||||
$ ./ablation_test_run.sh -n 16 -c true -r true
|
||||
$ ./ablation_test_run.sh -n 33 -c 0
|
||||
```
|
||||
|
||||
## References
|
||||
|
|
|
@ -80,9 +80,14 @@ void ConvertToHLSCpp::runOnOperation() {
|
|||
// Set array pragma attributes, default array instance is ram_1p
|
||||
// bram. Other attributes are not set here since they requires more
|
||||
// analysis to be determined.
|
||||
arrayOp.setAttr("interface", builder.getBoolAttr(false));
|
||||
arrayOp.setAttr("storage", builder.getBoolAttr(false));
|
||||
arrayOp.setAttr("partition", builder.getBoolAttr(false));
|
||||
if (!arrayOp.getAttr("interface"))
|
||||
arrayOp.setAttr("interface", builder.getBoolAttr(false));
|
||||
|
||||
if (!arrayOp.getAttr("storage"))
|
||||
arrayOp.setAttr("storage", builder.getBoolAttr(false));
|
||||
|
||||
if (!arrayOp.getAttr("partition"))
|
||||
arrayOp.setAttr("partition", builder.getBoolAttr(false));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -92,9 +97,14 @@ void ConvertToHLSCpp::runOnOperation() {
|
|||
forOp.emitError("has zero or more than one basic blocks");
|
||||
|
||||
// Set loop pragma attributes.
|
||||
forOp.setAttr("pipeline", builder.getBoolAttr(false));
|
||||
forOp.setAttr("unroll", builder.getBoolAttr(false));
|
||||
forOp.setAttr("flatten", builder.getBoolAttr(false));
|
||||
if (!forOp.getAttr("pipeline"))
|
||||
forOp.setAttr("pipeline", builder.getBoolAttr(false));
|
||||
|
||||
if (!forOp.getAttr("unroll"))
|
||||
forOp.setAttr("unroll", builder.getBoolAttr(false));
|
||||
|
||||
if (!forOp.getAttr("flatten"))
|
||||
forOp.setAttr("flatten", builder.getBoolAttr(false));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -284,7 +284,7 @@ bool HLSKernelVisitor::visitOp(MaxPoolOp op) {
|
|||
auto dataType = O.getType().cast<MemRefType>().getElementType();
|
||||
auto zeroConst = builder.create<mlir::ConstantOp>(
|
||||
op.getLoc(), builder.getZeroAttr(dataType));
|
||||
createStore(zeroConst, O, {h, c, h, w});
|
||||
createStore(zeroConst, O, {n, c, h, w});
|
||||
|
||||
// Create kernel height, and kernel width loop.
|
||||
auto r = createLoop(0, kernelShape[0]);
|
||||
|
@ -308,7 +308,7 @@ bool HLSKernelVisitor::visitOp(MaxPoolOp op) {
|
|||
// Carry out selection and store the greater value.
|
||||
auto newGreatest = builder.create<mlir::SelectOp>(op.getLoc(), greaterThanTmp,
|
||||
fmap, tmpGreatest);
|
||||
createStore(newGreatest, O, {h, c, h, w});
|
||||
createStore(newGreatest, O, {n, c, h, w});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1195,7 +1195,7 @@ void ModuleEmitter::emitAssign(AssignOp *op) {
|
|||
}
|
||||
|
||||
void ModuleEmitter::emitArray(ArrayOp *op) {
|
||||
// addAlias(op->getOperand(), op->getResult());
|
||||
addAlias(op->getOperand(), op->getResult());
|
||||
|
||||
if (op->interface()) {
|
||||
|
||||
|
|
|
@ -20,13 +20,107 @@ struct ArrayPartition : public ArrayPartitionBase<ArrayPartition> {
|
|||
};
|
||||
} // namespace
|
||||
|
||||
static mlir::AffineForOp getPipelineLoop(mlir::AffineForOp root) {
|
||||
SmallVector<mlir::AffineForOp, 4> nestedLoops;
|
||||
root.walk([&](mlir::AffineForOp loop) {
|
||||
if (auto attr = loop.getAttrOfType<BoolAttr>("pipeline")) {
|
||||
if (attr.getValue())
|
||||
nestedLoops.push_back(loop);
|
||||
}
|
||||
});
|
||||
return nestedLoops.back();
|
||||
}
|
||||
|
||||
template <typename OpType>
|
||||
static void applyArrayPartition(MemAccessDict &accessDict, OpBuilder &builder) {
|
||||
for (auto pair : accessDict) {
|
||||
auto arrayOp = cast<ArrayOp>(pair.first);
|
||||
auto arrayType = arrayOp.getType().cast<MemRefType>();
|
||||
auto arrayAccesses = pair.second;
|
||||
|
||||
// Walk through each dimension of the targeted array.
|
||||
SmallVector<Attribute, 4> partitionFactor;
|
||||
SmallVector<StringRef, 4> partitionType;
|
||||
|
||||
for (size_t dim = 0, e = arrayType.getShape().size(); dim < e; ++dim) {
|
||||
// Collect all array access indices of the current dimension.
|
||||
SmallVector<AffineExpr, 4> indices;
|
||||
for (auto accessOp : arrayAccesses) {
|
||||
auto concreteOp = cast<OpType>(accessOp);
|
||||
auto index = concreteOp.getAffineMap().getResult(dim);
|
||||
// Only add unique index.
|
||||
if (std::find(indices.begin(), indices.end(), index) == indices.end())
|
||||
indices.push_back(index);
|
||||
}
|
||||
auto accessNum = indices.size();
|
||||
|
||||
// Find the max array access distance in the current block.
|
||||
unsigned maxDistance = 0;
|
||||
bool failFlag = false;
|
||||
|
||||
for (unsigned i = 0; i < accessNum; ++i) {
|
||||
for (unsigned j = i + 1; j < accessNum; ++j) {
|
||||
// TODO: this expression can't be simplified.
|
||||
auto expr = indices[j] - indices[i];
|
||||
|
||||
if (auto constDistance = expr.dyn_cast<AffineConstantExpr>()) {
|
||||
unsigned distance = abs(constDistance.getValue());
|
||||
maxDistance = max(maxDistance, distance);
|
||||
} else {
|
||||
// The array partition mechanism will fail if the distance is
|
||||
// not a constant number.
|
||||
// failFlag = true;
|
||||
// break;
|
||||
}
|
||||
}
|
||||
// if (failFlag)
|
||||
// break;
|
||||
}
|
||||
|
||||
// Determine array partition strategy.
|
||||
maxDistance += 1;
|
||||
if (failFlag || maxDistance == 1) {
|
||||
// This means all accesses have the same index, and this dimension
|
||||
// should not be partitioned.
|
||||
partitionType.push_back("none");
|
||||
partitionFactor.push_back(builder.getUI32IntegerAttr(1));
|
||||
|
||||
} else if (accessNum >= maxDistance) {
|
||||
// This means some elements are accessed more than once or exactly
|
||||
// once, and successive elements are accessed. In most cases,
|
||||
// apply "cyclic" partition should be the best solution.
|
||||
partitionType.push_back("cyclic");
|
||||
partitionFactor.push_back(builder.getUI32IntegerAttr(maxDistance));
|
||||
|
||||
} else {
|
||||
// This means discrete elements are accessed. Typically, "block"
|
||||
// partition will be most benefit for this occasion.
|
||||
partitionType.push_back("block");
|
||||
partitionFactor.push_back(builder.getUI32IntegerAttr(accessNum));
|
||||
}
|
||||
}
|
||||
|
||||
arrayOp.setAttr("partition", builder.getBoolAttr(true));
|
||||
arrayOp.setAttr("partition_type", builder.getStrArrayAttr(partitionType));
|
||||
arrayOp.setAttr("partition_factor", builder.getArrayAttr(partitionFactor));
|
||||
}
|
||||
}
|
||||
|
||||
void ArrayPartition::runOnOperation() {
|
||||
auto module = getOperation();
|
||||
auto builder = OpBuilder(module);
|
||||
|
||||
// Extract all static parameters and current pragma configurations.
|
||||
HLSCppAnalyzer analyzer(builder);
|
||||
analyzer.analyzeModule(getOperation());
|
||||
// If the current loop is annotated as pipeline, all intter loops are
|
||||
// automatically unrolled.
|
||||
for (auto func : module.getOps<FuncOp>()) {
|
||||
for (auto forOp : func.getOps<mlir::AffineForOp>()) {
|
||||
auto outermost = getPipelineLoop(forOp);
|
||||
outermost.walk([&](mlir::AffineForOp loop) {
|
||||
if (loop != outermost)
|
||||
loopUnrollFull(loop);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Canonicalize the analyzed IR.
|
||||
OwningRewritePatternList patterns;
|
||||
|
@ -38,109 +132,27 @@ void ArrayPartition::runOnOperation() {
|
|||
Operation *op = getOperation();
|
||||
applyPatternsAndFoldGreedily(op->getRegions(), std::move(patterns));
|
||||
|
||||
// Estimate performance and resource utilization.
|
||||
// Apply array partition.
|
||||
for (auto func : module.getOps<FuncOp>()) {
|
||||
for (auto forOp : func.getOps<mlir::AffineForOp>()) {
|
||||
// TODO: support imperfect loop nests.
|
||||
SmallVector<mlir::AffineForOp, 4> nestedLoops;
|
||||
getPerfectlyNestedLoops(nestedLoops, forOp);
|
||||
auto innermost = nestedLoops.back();
|
||||
auto outermost = getPipelineLoop(forOp);
|
||||
|
||||
// Collect memory access information.
|
||||
MemAccessDict loadDict;
|
||||
innermost.walk([&](mlir::AffineLoadOp loadOp) {
|
||||
outermost.walk([&](mlir::AffineLoadOp loadOp) {
|
||||
auto arrayOp = cast<ArrayOp>(loadOp.getMemRef().getDefiningOp());
|
||||
loadDict[arrayOp].push_back(loadOp);
|
||||
});
|
||||
|
||||
MemAccessDict storeDict;
|
||||
innermost.walk([&](mlir::AffineStoreOp storeOp) {
|
||||
outermost.walk([&](mlir::AffineStoreOp storeOp) {
|
||||
auto arrayOp = cast<ArrayOp>(storeOp.getMemRef().getDefiningOp());
|
||||
storeDict[arrayOp].push_back(storeOp);
|
||||
});
|
||||
|
||||
// Apply array partition pragma.
|
||||
for (auto pair : loadDict) {
|
||||
auto arrayOp = cast<ArrayOp>(pair.first);
|
||||
auto arrayType = arrayOp.getType().cast<MemRefType>();
|
||||
auto arrayAccesses = pair.second;
|
||||
|
||||
// Walk through each dimension of the targeted array.
|
||||
SmallVector<Attribute, 4> partitionFactor;
|
||||
SmallVector<StringRef, 4> partitionType;
|
||||
|
||||
for (size_t dim = 0, e = arrayType.getShape().size(); dim < e; ++dim) {
|
||||
unsigned dimSize = arrayType.getShape()[dim];
|
||||
|
||||
// Collect all array access indices of the current dimension.
|
||||
SmallVector<AffineExpr, 4> indices;
|
||||
for (auto accessOp : arrayAccesses) {
|
||||
auto concreteOp = cast<mlir::AffineLoadOp>(accessOp);
|
||||
auto index = concreteOp.getAffineMap().getResult(dim);
|
||||
// Only add unique index.
|
||||
if (std::find(indices.begin(), indices.end(), index) ==
|
||||
indices.end())
|
||||
indices.push_back(index);
|
||||
}
|
||||
auto accessNum = indices.size();
|
||||
|
||||
// Find the max array access distance in the current block.
|
||||
unsigned maxDistance = 0;
|
||||
bool failFlag = false;
|
||||
|
||||
for (unsigned i = 0; i < accessNum; ++i) {
|
||||
for (unsigned j = i + 1; j < accessNum; ++j) {
|
||||
// TODO: this expression can't be simplified.
|
||||
auto expr = indices[j] - indices[i];
|
||||
|
||||
if (auto constDistance = expr.dyn_cast<AffineConstantExpr>()) {
|
||||
unsigned distance = abs(constDistance.getValue());
|
||||
maxDistance = max(maxDistance, distance);
|
||||
} else {
|
||||
// The array partition mechanism will fail if the distance is
|
||||
// not a constant number.
|
||||
// failFlag = true;
|
||||
// break;
|
||||
}
|
||||
}
|
||||
// if (failFlag)
|
||||
// break;
|
||||
}
|
||||
|
||||
// Determine array partition strategy.
|
||||
maxDistance += 1;
|
||||
if (failFlag || maxDistance == 1) {
|
||||
// This means all accesses have the same index, and this dimension
|
||||
// should not be partitioned.
|
||||
partitionType.push_back("none");
|
||||
partitionFactor.push_back(builder.getUI32IntegerAttr(1));
|
||||
|
||||
} else if (accessNum == dimSize) {
|
||||
// Apply complete array partition.
|
||||
partitionType.push_back("complete");
|
||||
partitionFactor.push_back(builder.getUI32IntegerAttr(1));
|
||||
|
||||
} else if (accessNum >= maxDistance) {
|
||||
// This means some elements are accessed more than once or exactly
|
||||
// once, and successive elements are accessed. In most cases, apply
|
||||
// "cyclic" partition should be the best solution.
|
||||
partitionType.push_back("cyclic");
|
||||
partitionFactor.push_back(builder.getUI32IntegerAttr(maxDistance));
|
||||
|
||||
} else {
|
||||
// This means discrete elements are accessed. Typically, "block"
|
||||
// partition will be most benefit for this occasion.
|
||||
partitionType.push_back("block");
|
||||
partitionFactor.push_back(builder.getUI32IntegerAttr(accessNum));
|
||||
}
|
||||
}
|
||||
|
||||
arrayOp.setAttr("partition", builder.getBoolAttr(true));
|
||||
arrayOp.setAttr("partition_type",
|
||||
builder.getStrArrayAttr(partitionType));
|
||||
arrayOp.setAttr("partition_factor",
|
||||
builder.getArrayAttr(partitionFactor));
|
||||
}
|
||||
applyArrayPartition<mlir::AffineLoadOp>(loadDict, builder);
|
||||
applyArrayPartition<mlir::AffineStoreOp>(storeDict, builder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Script options.
|
||||
while getopts 'n:c:r:' opt
|
||||
while getopts 'n:c:' opt
|
||||
do
|
||||
case $opt in
|
||||
n) ablation_number=$OPTARG ;;
|
||||
c) rerun_csynth=$OPTARG ;;
|
||||
r) rerun_report=$OPTARG ;;
|
||||
c) rerun_csynth_from=$OPTARG ;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
@ -28,10 +27,13 @@ fi
|
|||
|
||||
# Candidate passes.
|
||||
hta=-hlskernel-to-affine
|
||||
pft=-affine-loop-perfection
|
||||
rvb=-remove-var-loop-bound
|
||||
cth=-convert-to-hlscpp
|
||||
can=-canonicalize
|
||||
|
||||
alp=-affine-loop-perfection
|
||||
rvb=-remove-var-loop-bound
|
||||
par=-array-partition
|
||||
|
||||
p0=-insert-pipeline-pragma="insert-level=0"
|
||||
p1=-insert-pipeline-pragma="insert-level=1"
|
||||
p2=-insert-pipeline-pragma="insert-level=2"
|
||||
|
@ -43,10 +45,15 @@ u3=-affine-loop-unroll="unroll-full unroll-num-reps=3"
|
|||
|
||||
t1s2=-partial-affine-loop-tile="tile-level=1 tile-size=2"
|
||||
t1s4=-partial-affine-loop-tile="tile-level=1 tile-size=4"
|
||||
t1s8=-partial-affine-loop-tile="tile-level=1 tile-size=8"
|
||||
|
||||
t2s2=-partial-affine-loop-tile="tile-level=2 tile-size=2"
|
||||
t2s4=-partial-affine-loop-tile="tile-level=2 tile-size=4"
|
||||
t2s8=-partial-affine-loop-tile="tile-level=2 tile-size=8"
|
||||
|
||||
t3s2=-partial-affine-loop-tile="tile-level=3 tile-size=2"
|
||||
t3s4=-partial-affine-loop-tile="tile-level=3 tile-size=4"
|
||||
t3s8=-partial-affine-loop-tile="tile-level=3 tile-size=8"
|
||||
|
||||
emit=-emit-hlscpp
|
||||
|
||||
|
@ -59,34 +66,63 @@ do
|
|||
do
|
||||
output="cpp_src/${file##*Affine/}.cpp"
|
||||
case $n in
|
||||
0) scalehls-opt $hta $can $file | scalehls-translate $emit -o $output ;;
|
||||
0) scalehls-opt $hta $cth $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply pipeline.
|
||||
1) scalehls-opt $hta "$p0" $can $file | scalehls-translate $emit -o $output ;;
|
||||
2) scalehls-opt $hta "$p1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
3) scalehls-opt $hta "$p2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
4) scalehls-opt $hta "$p3" $can $file | scalehls-translate $emit -o $output ;;
|
||||
1) scalehls-opt $hta $cth "$p0" $can $file | scalehls-translate $emit -o $output ;;
|
||||
2) scalehls-opt $hta $cth "$p1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
3) scalehls-opt $hta $cth "$p2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
4) scalehls-opt $hta $cth "$p3" $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply loop perfection + pipeline.
|
||||
5) scalehls-opt $hta $pft "$p0" $can $file | scalehls-translate $emit -o $output ;;
|
||||
6) scalehls-opt $hta $pft "$p1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
7) scalehls-opt $hta $pft "$p2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
8) scalehls-opt $hta $pft "$p3" $can $file | scalehls-translate $emit -o $output ;;
|
||||
# Apply pipeline + array partition.
|
||||
5) scalehls-opt $hta $cth "$p0" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
6) scalehls-opt $hta $cth "$p1" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
7) scalehls-opt $hta $cth "$p2" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
8) scalehls-opt $hta $cth "$p3" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply loop perfection + remove variable bound + pipeline.
|
||||
9) scalehls-opt $hta $pft $rvb "$p0" $can $file | scalehls-translate $emit -o $output ;;
|
||||
10) scalehls-opt $hta $pft $rvb "$p1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
11) scalehls-opt $hta $pft $rvb "$p2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
12) scalehls-opt $hta $pft $rvb "$p3" $can $file | scalehls-translate $emit -o $output ;;
|
||||
9) scalehls-opt $hta $alp $rvb $cth "$p0" $can $file | scalehls-translate $emit -o $output ;;
|
||||
10) scalehls-opt $hta $alp $rvb $cth "$p1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
11) scalehls-opt $hta $alp $rvb $cth "$p2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
12) scalehls-opt $hta $alp $rvb $cth "$p3" $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply loop perfection + remove variable bound + loop tiling + pipeline.
|
||||
13) scalehls-opt $hta $pft $rvb "$t1s4" "$p1" "$u1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
14) scalehls-opt $hta $pft $rvb "$t2s4" "$p2" "$u2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
15) scalehls-opt $hta $pft $rvb "$t3s4" "$p3" "$u3" $can $file | scalehls-translate $emit -o $output ;;
|
||||
# Apply loop perfection + remove variable bound + pipeline + array partition.
|
||||
13) scalehls-opt $hta $alp $rvb $cth "$p0" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
14) scalehls-opt $hta $alp $rvb $cth "$p1" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
15) scalehls-opt $hta $alp $rvb $cth "$p2" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
16) scalehls-opt $hta $alp $rvb $cth "$p3" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply ... + 1st-level loop tiling + pipeline.
|
||||
17) scalehls-opt $hta $alp $rvb "$t1s2" $cth "$p1" "$u1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
18) scalehls-opt $hta $alp $rvb "$t1s4" $cth "$p1" "$u1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
19) scalehls-opt $hta $alp $rvb "$t1s8" $cth "$p1" "$u1" $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply ... + 1st-level loop tiling + pipeline + array partition.
|
||||
20) scalehls-opt $hta $alp $rvb "$t1s2" $cth "$p1" "$u1" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
21) scalehls-opt $hta $alp $rvb "$t1s4" $cth "$p1" "$u1" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
22) scalehls-opt $hta $alp $rvb "$t1s8" $cth "$p1" "$u1" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply ... + 2nd-level loop tiling + pipeline.
|
||||
23) scalehls-opt $hta $alp $rvb "$t2s2" $cth "$p2" "$u2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
24) scalehls-opt $hta $alp $rvb "$t2s4" $cth "$p2" "$u2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
25) scalehls-opt $hta $alp $rvb "$t2s8" $cth "$p2" "$u2" $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply ... + 2nd-level loop tiling + pipeline + array partition.
|
||||
26) scalehls-opt $hta $alp $rvb "$t2s2" $cth "$p2" "$u2" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
27) scalehls-opt $hta $alp $rvb "$t2s4" $cth "$p2" "$u2" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
28) scalehls-opt $hta $alp $rvb "$t2s8" $cth "$p2" "$u2" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply ... + 3rd-level loop tiling + pipeline.
|
||||
29) scalehls-opt $hta $alp $rvb "$t3s2" $cth "$p3" "$u3" $can $file | scalehls-translate $emit -o $output ;;
|
||||
30) scalehls-opt $hta $alp $rvb "$t3s4" $cth "$p3" "$u3" $can $file | scalehls-translate $emit -o $output ;;
|
||||
|
||||
# Apply ... + 3rd-level loop tiling + pipeline + array partition.
|
||||
31) scalehls-opt $hta $cth $alp $rvb "$t3s2" "$p3" "$u3" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
32) scalehls-opt $hta $cth $alp $rvb "$t3s4" "$p3" "$u3" $par $can $file | scalehls-translate $emit -o $output ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $rerun_csynth == "true" ]
|
||||
if [ $n -ge $rerun_csynth_from ]
|
||||
then
|
||||
# Run HLS synthesis.
|
||||
cd hls_proj
|
||||
|
@ -94,7 +130,7 @@ do
|
|||
cd ..
|
||||
fi
|
||||
|
||||
if [ $rerun_report == "true" ]
|
||||
if [ $n -ge $rerun_csynth_from ]
|
||||
then
|
||||
# Generate latency report.
|
||||
echo -e "benchmark\tdsp\tlut\tcycles" > test_results/test_result$n.log
|
||||
|
@ -138,22 +174,51 @@ do
|
|||
3) echo -e "p2\t\c" >> test_summary.log ;;
|
||||
4) echo -e "p3\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply loop perfection + pipeline.
|
||||
5) echo -e "pft+p0\t\c" >> test_summary.log ;;
|
||||
6) echo -e "pft+p1\t\c" >> test_summary.log ;;
|
||||
7) echo -e "pft+p2\t\c" >> test_summary.log ;;
|
||||
8) echo -e "pft+p3\t\c" >> test_summary.log ;;
|
||||
# Apply pipeline + array partition.
|
||||
5) echo -e "p0+par\t\c" >> test_summary.log ;;
|
||||
6) echo -e "p1+par\t\c" >> test_summary.log ;;
|
||||
7) echo -e "p2+par\t\c" >> test_summary.log ;;
|
||||
8) echo -e "p3+par\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply loop perfection + remove variable bound + pipeline.
|
||||
9) echo -e "pft+rvb+p0\t\c" >> test_summary.log ;;
|
||||
10) echo -e "pft+rvb+p1\t\c" >> test_summary.log ;;
|
||||
11) echo -e "pft+rvb+p2\t\c" >> test_summary.log ;;
|
||||
12) echo -e "pft+rvb+p3\t\c" >> test_summary.log ;;
|
||||
9) echo -e "ar+p0\t\c" >> test_summary.log ;;
|
||||
10) echo -e "ar+p1\t\c" >> test_summary.log ;;
|
||||
11) echo -e "ar+p2\t\c" >> test_summary.log ;;
|
||||
12) echo -e "ar+p3\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply loop perfection + remove variable bound + loop tiling + pipeline.
|
||||
13) echo -e "pft+rvb+t1+p1\t\c" >> test_summary.log ;;
|
||||
14) echo -e "pft+rvb+t2+p2\t\c" >> test_summary.log ;;
|
||||
15) echo -e "pft+rvb+t3+p3\t\c" >> test_summary.log ;;
|
||||
# Apply loop perfection + remove variable bound + pipeline + array partition.
|
||||
13) echo -e "ar+p0+par\t\c" >> test_summary.log ;;
|
||||
14) echo -e "ar+p1+par\t\c" >> test_summary.log ;;
|
||||
15) echo -e "ar+p2+par\t\c" >> test_summary.log ;;
|
||||
16) echo -e "ar+p3+par\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply ... + 1st-level loop tiling + pipeline.
|
||||
17) echo -e "ar+t1s2+p1\t\c" >> test_summary.log ;;
|
||||
18) echo -e "ar+t1s4+p1\t\c" >> test_summary.log ;;
|
||||
19) echo -e "ar+t1s8+p1\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply ... + 1st-level loop tiling + pipeline + array partition.
|
||||
20) echo -e "ar+t1s2+p1+par\t\c" >> test_summary.log ;;
|
||||
21) echo -e "ar+t1s4+p1+par\t\c" >> test_summary.log ;;
|
||||
22) echo -e "ar+t1s8+p1+par\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply ... + 2nd-level loop tiling + pipeline.
|
||||
23) echo -e "ar+t2s2+p2\t\c" >> test_summary.log ;;
|
||||
24) echo -e "ar+t2s4+p2\t\c" >> test_summary.log ;;
|
||||
25) echo -e "ar+t2s8+p2\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply ... + 2nd-level loop tiling + pipeline + array partition.
|
||||
26) echo -e "ar+t2s2+p2+par\t\c" >> test_summary.log ;;
|
||||
27) echo -e "ar+t2s4+p2+par\t\c" >> test_summary.log ;;
|
||||
28) echo -e "ar+t2s8+p2+par\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply ... + 3rd-level loop tiling + pipeline.
|
||||
29) echo -e "ar+t3s2+p3\t\c" >> test_summary.log ;;
|
||||
30) echo -e "ar+t3s4+p3\t\c" >> test_summary.log ;;
|
||||
|
||||
# Apply ... + 3rd-level loop tiling + pipeline + array partition.
|
||||
31) echo -e "ar+t3s2+p3+par\t\c" >> test_summary.log ;;
|
||||
32) echo -e "ar+t3s4+p3+par\t\c" >> test_summary.log ;;
|
||||
esac
|
||||
|
||||
cat $result | awk "NR==$idx{OFS=\"\t\";print \$2,\$3,\$4}" >> test_summary.log
|
||||
|
|
Loading…
Reference in New Issue