[LegalizeDataflow] update impl structure and fix bugs, support resnet18 legalization; [EmitHLSCpp] emit bool rather than ap_int<1>
This commit is contained in:
parent
fef0cdc3fe
commit
9652bee260
|
@ -2,6 +2,7 @@
|
||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
|
|
||||||
build
|
build
|
||||||
|
tmp
|
||||||
samples/hls_proj
|
samples/hls_proj
|
||||||
samples/cpp_src
|
samples/cpp_src
|
||||||
samples/test_results
|
samples/test_results
|
||||||
|
|
|
@ -116,17 +116,12 @@ hlscpp::ArrayOp getArrayOp(Value memref);
|
||||||
|
|
||||||
hlscpp::ArrayOp getArrayOp(Operation *op);
|
hlscpp::ArrayOp getArrayOp(Operation *op);
|
||||||
|
|
||||||
// For storing all accessed memrefs indexed by an operation (e.g. AffineForOp).
|
// For storing the intermediate memory and successor loops indexed by the
|
||||||
using MemRefs = SmallVector<Value, 4>;
|
// predecessor loop.
|
||||||
using MemRefsMap = DenseMap<Operation *, MemRefs>;
|
using Successors = SmallVector<std::pair<Value, Operation *>, 2>;
|
||||||
|
using SuccessorsMap = DenseMap<Operation *, Successors>;
|
||||||
|
|
||||||
/// With the generated MemRefsMap, given a specific loop, we can easily find all
|
void getSuccessorsMap(Block &block, SuccessorsMap &map);
|
||||||
/// memories which are consumed by the loop.
|
|
||||||
void getLoopLoadMemsMap(Block &block, MemRefsMap &map);
|
|
||||||
|
|
||||||
/// With the generated MemAccessesMap, given a specific memory, we can easily
|
|
||||||
/// find the loops which produce data to the memory.
|
|
||||||
void getLoopMemStoresMap(Block &block, MemAccessesMap &map);
|
|
||||||
|
|
||||||
} // namespace scalehls
|
} // namespace scalehls
|
||||||
} // namespace mlir
|
} // namespace mlir
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include "Analysis/Utils.h"
|
#include "Analysis/Utils.h"
|
||||||
#include "mlir/Analysis/AffineAnalysis.h"
|
#include "mlir/Analysis/AffineAnalysis.h"
|
||||||
|
#include "llvm/ADT/SmallPtrSet.h"
|
||||||
|
|
||||||
using namespace mlir;
|
using namespace mlir;
|
||||||
using namespace scalehls;
|
using namespace scalehls;
|
||||||
|
@ -140,41 +141,34 @@ hlscpp::ArrayOp scalehls::getArrayOp(Operation *op) {
|
||||||
return getArrayOp(MemRefAccess(op).memref);
|
return getArrayOp(MemRefAccess(op).memref);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// With the generated MemRefsMap, given a specific loop, we can easily find all
|
void scalehls::getSuccessorsMap(Block &block, SuccessorsMap &map) {
|
||||||
/// memories which are consumed by the loop.
|
DenseMap<Operation *, SmallPtrSet<Value, 2>> memsMap;
|
||||||
void scalehls::getLoopLoadMemsMap(Block &block, MemRefsMap &map) {
|
DenseMap<Value, SmallPtrSet<Operation *, 2>> loopsMap;
|
||||||
for (auto loop : block.getOps<AffineForOp>()) {
|
|
||||||
loop.walk([&](Operation *op) {
|
|
||||||
if (auto affineLoad = dyn_cast<AffineLoadOp>(op)) {
|
|
||||||
auto &mems = map[loop];
|
|
||||||
if (std::find(mems.begin(), mems.end(), affineLoad.getMemRef()) ==
|
|
||||||
mems.end())
|
|
||||||
mems.push_back(affineLoad.getMemRef());
|
|
||||||
|
|
||||||
} else if (auto load = dyn_cast<LoadOp>(op)) {
|
for (auto loop : block.getOps<AffineForOp>())
|
||||||
auto &mems = map[loop];
|
|
||||||
if (std::find(mems.begin(), mems.end(), load.getMemRef()) == mems.end())
|
|
||||||
mems.push_back(load.getMemRef());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// With the generated MemAccessesMap, given a specific memory, we can easily
|
|
||||||
/// find the loops which produce data to the memory.
|
|
||||||
void scalehls::getLoopMemStoresMap(Block &block, MemAccessesMap &map) {
|
|
||||||
for (auto loop : block.getOps<AffineForOp>()) {
|
|
||||||
loop.walk([&](Operation *op) {
|
loop.walk([&](Operation *op) {
|
||||||
if (auto affineStore = dyn_cast<AffineStoreOp>(op)) {
|
if (auto affineStore = dyn_cast<AffineStoreOp>(op)) {
|
||||||
auto &loops = map[affineStore.getMemRef()];
|
memsMap[loop].insert(affineStore.getMemRef());
|
||||||
if (std::find(loops.begin(), loops.end(), loop) == loops.end())
|
|
||||||
loops.push_back(loop);
|
|
||||||
|
|
||||||
} else if (auto store = dyn_cast<StoreOp>(op)) {
|
} else if (auto store = dyn_cast<StoreOp>(op)) {
|
||||||
auto &loops = map[store.getMemRef()];
|
memsMap[loop].insert(store.getMemRef());
|
||||||
if (std::find(loops.begin(), loops.end(), loop) == loops.end())
|
|
||||||
loops.push_back(loop);
|
} else if (auto affineLoad = dyn_cast<AffineLoadOp>(op)) {
|
||||||
|
loopsMap[affineLoad.getMemRef()].insert(loop);
|
||||||
|
|
||||||
|
} else if (auto load = dyn_cast<LoadOp>(op)) {
|
||||||
|
loopsMap[load.getMemRef()].insert(loop);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
for (auto loop : block.getOps<AffineForOp>())
|
||||||
|
for (auto mem : memsMap[loop])
|
||||||
|
for (auto successor : loopsMap[mem]) {
|
||||||
|
// If the successor loop not only loads from the memory, but also store
|
||||||
|
// to the memory, it will not be considered as a legal successor.
|
||||||
|
if (successor == loop || memsMap[successor].count(mem))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
map[loop].push_back(std::pair<Value, Operation *>(mem, successor));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1347,10 +1347,14 @@ void ModuleEmitter::emitValue(Value val, unsigned rank, bool isPtr) {
|
||||||
else if (valType.isa<IndexType>())
|
else if (valType.isa<IndexType>())
|
||||||
os << "int ";
|
os << "int ";
|
||||||
else if (auto intType = valType.dyn_cast<IntegerType>()) {
|
else if (auto intType = valType.dyn_cast<IntegerType>()) {
|
||||||
os << "ap_";
|
if (intType.getWidth() == 1)
|
||||||
if (intType.getSignedness() == IntegerType::SignednessSemantics::Unsigned)
|
os << "bool ";
|
||||||
os << "u";
|
else {
|
||||||
os << "int<" << intType.getWidth() << "> ";
|
os << "ap_";
|
||||||
|
if (intType.getSignedness() == IntegerType::SignednessSemantics::Unsigned)
|
||||||
|
os << "u";
|
||||||
|
os << "int<" << intType.getWidth() << "> ";
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
emitError(val.getDefiningOp(), "has unsupported type.");
|
emitError(val.getDefiningOp(), "has unsupported type.");
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,7 @@ void LegalizeDataflow::runOnOperation() {
|
||||||
dataflowLevel = max(dataflowLevel, attr.getInt());
|
dataflowLevel = max(dataflowLevel, attr.getInt());
|
||||||
else
|
else
|
||||||
op->emitError(
|
op->emitError(
|
||||||
"HLSKernelOp has unexpected predecessor, legalization failed");
|
"HLSKernelOp has unexpected successor, legalization failed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,69 +86,59 @@ void LegalizeDataflow::runOnOperation() {
|
||||||
// this point. Therefore, HLSKernel ops and loops will never have dependencies
|
// this point. Therefore, HLSKernel ops and loops will never have dependencies
|
||||||
// with each other in this pass.
|
// with each other in this pass.
|
||||||
// TODO: analyze live ins.
|
// TODO: analyze live ins.
|
||||||
MemRefsMap loadMemsMap;
|
SuccessorsMap successorsMap;
|
||||||
MemAccessesMap memStoresMap;
|
getSuccessorsMap(func.front(), successorsMap);
|
||||||
getLoopLoadMemsMap(func.front(), loadMemsMap);
|
|
||||||
getLoopMemStoresMap(func.front(), memStoresMap);
|
|
||||||
|
|
||||||
for (auto loop : func.front().getOps<mlir::AffineForOp>()) {
|
for (auto it = func.front().rbegin(); it != func.front().rend(); ++it) {
|
||||||
int64_t dataflowLevel = 0;
|
if (auto loop = dyn_cast<mlir::AffineForOp>(*it)) {
|
||||||
for (auto mem : loadMemsMap[loop]) {
|
int64_t dataflowLevel = 0;
|
||||||
for (auto predLoop : memStoresMap[mem]) {
|
|
||||||
if (predLoop == loop)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Establish an ASAP dataflow schedule.
|
// Walk through all successor loops.
|
||||||
if (auto attr = predLoop->getAttrOfType<IntegerAttr>("dataflow_level"))
|
for (auto pair : successorsMap[loop]) {
|
||||||
|
auto successor = pair.second;
|
||||||
|
if (auto attr = successor->getAttrOfType<IntegerAttr>("dataflow_level"))
|
||||||
dataflowLevel = max(dataflowLevel, attr.getInt());
|
dataflowLevel = max(dataflowLevel, attr.getInt());
|
||||||
else
|
else {
|
||||||
loop.emitError(
|
loop.emitError("loop has unexpected successor, legalization failed");
|
||||||
"loop has unexpected predecessor, legalization failed");
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Set an attribute for indicating the scheduled dataflow level.
|
// Set an attribute for indicating the scheduled dataflow level.
|
||||||
loop.setAttr("dataflow_level", builder.getIntegerAttr(builder.getI64Type(),
|
loop.setAttr(
|
||||||
dataflowLevel + 1));
|
"dataflow_level",
|
||||||
|
builder.getIntegerAttr(builder.getI64Type(), dataflowLevel + 1));
|
||||||
|
|
||||||
// Eliminate bypass paths.
|
// Eliminate bypass paths.
|
||||||
for (auto mem : loadMemsMap[loop]) {
|
for (auto pair : successorsMap[loop]) {
|
||||||
for (auto predLoop : memStoresMap[mem]) {
|
auto mem = pair.first;
|
||||||
if (predLoop == loop)
|
auto successor = pair.second;
|
||||||
continue;
|
auto successorDataflowLevel =
|
||||||
|
successor->getAttrOfType<IntegerAttr>("dataflow_level").getInt();
|
||||||
|
|
||||||
auto predDataflowLevel =
|
// Insert CopyOps if required.
|
||||||
predLoop->getAttrOfType<IntegerAttr>("dataflow_level").getInt();
|
SmallVector<Value, 4> mems;
|
||||||
|
mems.push_back(mem);
|
||||||
|
builder.setInsertionPoint(successor);
|
||||||
|
|
||||||
// Insert dummy CopyOps if required.
|
for (auto i = dataflowLevel; i > successorDataflowLevel; --i) {
|
||||||
SmallVector<Operation *, 4> dummyOps;
|
|
||||||
dummyOps.push_back(loop);
|
|
||||||
for (auto i = dataflowLevel; i > predDataflowLevel; --i) {
|
|
||||||
// Create CopyOp.
|
// Create CopyOp.
|
||||||
builder.setInsertionPoint(dummyOps.back());
|
auto newMem = builder.create<mlir::AllocOp>(
|
||||||
auto interMem = builder.create<mlir::AllocOp>(
|
|
||||||
loop.getLoc(), mem.getType().cast<MemRefType>());
|
loop.getLoc(), mem.getType().cast<MemRefType>());
|
||||||
auto dummyOp =
|
auto copyOp = builder.create<linalg::CopyOp>(loop.getLoc(),
|
||||||
builder.create<linalg::CopyOp>(loop.getLoc(), mem, interMem);
|
mems.back(), newMem);
|
||||||
dummyOp.setAttr("dataflow_level",
|
|
||||||
builder.getIntegerAttr(builder.getI64Type(), i));
|
// Set CopyOp dataflow level.
|
||||||
|
copyOp.setAttr("dataflow_level",
|
||||||
|
builder.getIntegerAttr(builder.getI64Type(), i));
|
||||||
|
|
||||||
// Chain created CopyOps.
|
// Chain created CopyOps.
|
||||||
if (i == dataflowLevel) {
|
if (i == successorDataflowLevel + 1)
|
||||||
loop.walk([&](Operation *op) {
|
mem.replaceUsesWithIf(newMem, [&](mlir::OpOperand &use) {
|
||||||
if (auto affineLoad = dyn_cast<mlir::AffineLoadOp>(op)) {
|
return successor->isProperAncestor(use.getOwner());
|
||||||
if (affineLoad.getMemRef() == mem)
|
|
||||||
affineLoad.setMemRef(interMem);
|
|
||||||
|
|
||||||
} else if (auto load = dyn_cast<mlir::LoadOp>(op)) {
|
|
||||||
if (load.getMemRef() == mem)
|
|
||||||
load.setMemRef(interMem);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
} else
|
else
|
||||||
dummyOps.back()->setOperand(0, interMem);
|
mems.push_back(newMem);
|
||||||
|
|
||||||
dummyOps.push_back(dummyOp);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -107,7 +107,7 @@ void SplitFunction::runOnOperation() {
|
||||||
for (unsigned i = 0, e = inputValues.size(); i < e; ++i)
|
for (unsigned i = 0, e = inputValues.size(); i < e; ++i)
|
||||||
inputValues[i].replaceUsesWithIf(
|
inputValues[i].replaceUsesWithIf(
|
||||||
entry->getArgument(i), [&](mlir::OpOperand &use) {
|
entry->getArgument(i), [&](mlir::OpOperand &use) {
|
||||||
return getSameLevelDstOp(returnOp, use.getOwner());
|
return func.getOperation()->isProperAncestor(use.getOwner());
|
||||||
});
|
});
|
||||||
opIndex += 1;
|
opIndex += 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
func @test_integer_compare(%arg0: i32, %arg1: i32) -> i1 {
|
func @test_integer_compare(%arg0: i32, %arg1: i32) -> i1 {
|
||||||
|
|
||||||
// CHECK: ap_int<1> [[VAL_0:.*]] = [[ARG_0:.*]] == [[ARG_1:.*]];
|
// CHECK: bool [[VAL_0:.*]] = [[ARG_0:.*]] == [[ARG_1:.*]];
|
||||||
%0 = cmpi "eq", %arg0, %arg1 : i32
|
%0 = cmpi "eq", %arg0, %arg1 : i32
|
||||||
// CHECK: !=
|
// CHECK: !=
|
||||||
%1 = cmpi "ne", %arg0, %arg1 : i32
|
%1 = cmpi "ne", %arg0, %arg1 : i32
|
||||||
|
@ -27,7 +27,7 @@ func @test_integer_compare(%arg0: i32, %arg1: i32) -> i1 {
|
||||||
|
|
||||||
func @test_float_compare(%arg0: f32, %arg1: f32) -> i1 {
|
func @test_float_compare(%arg0: f32, %arg1: f32) -> i1 {
|
||||||
|
|
||||||
// CHECK: ap_int<1> [[VAL_0:.*]] = [[ARG_0:.*]] == [[ARG_1:.*]];
|
// CHECK: bool [[VAL_0:.*]] = [[ARG_0:.*]] == [[ARG_1:.*]];
|
||||||
%0 = cmpf "oeq", %arg0, %arg1 : f32
|
%0 = cmpf "oeq", %arg0, %arg1 : f32
|
||||||
// CHECK: ==
|
// CHECK: ==
|
||||||
%1 = cmpf "ueq", %arg0, %arg1 : f32
|
%1 = cmpf "ueq", %arg0, %arg1 : f32
|
||||||
|
|
|
@ -8,7 +8,7 @@ func @test_constant(%arg0: i32) -> (i32, tensor<2x2xi32>, vector<2xi32>, i32) {
|
||||||
// CHECK: float [[VAL_1:.*]][2][2] = {1.100000e+01, 0.000000e+00, 0.000000e+00, -4.200000e+01};
|
// CHECK: float [[VAL_1:.*]][2][2] = {1.100000e+01, 0.000000e+00, 0.000000e+00, -4.200000e+01};
|
||||||
%1 = constant dense<[[11.0, 0.0], [0.0, -42.0]]> : tensor<2x2xf32>
|
%1 = constant dense<[[11.0, 0.0], [0.0, -42.0]]> : tensor<2x2xf32>
|
||||||
|
|
||||||
// CHECK: ap_int<1> [[VAL_2:.*]][2][2] = {1, 0, 0, 1};
|
// CHECK: bool [[VAL_2:.*]][2][2] = {1, 0, 0, 1};
|
||||||
%2 = constant dense<[[1, 0], [0, 1]]> : tensor<2x2xi1>
|
%2 = constant dense<[[1, 0], [0, 1]]> : tensor<2x2xi1>
|
||||||
|
|
||||||
// CHECK: ap_int<32> [[VAL_3:.*]][2] = {0, -42};
|
// CHECK: ap_int<32> [[VAL_3:.*]][2] = {0, -42};
|
||||||
|
@ -17,7 +17,7 @@ func @test_constant(%arg0: i32) -> (i32, tensor<2x2xi32>, vector<2xi32>, i32) {
|
||||||
// CHECK: float [[VAL_4:.*]][2] = {0.000000e+00, -4.200000e+01};
|
// CHECK: float [[VAL_4:.*]][2] = {0.000000e+00, -4.200000e+01};
|
||||||
%4 = constant dense<[0.0, -42.0]> : vector<2xf32>
|
%4 = constant dense<[0.0, -42.0]> : vector<2xf32>
|
||||||
|
|
||||||
// CHECK: ap_int<1> [[VAL_5:.*]][2] = {0, 1};
|
// CHECK: bool [[VAL_5:.*]][2] = {0, 1};
|
||||||
%5 = constant dense<[0, 1]> : vector<2xi1>
|
%5 = constant dense<[0, 1]> : vector<2xi1>
|
||||||
|
|
||||||
// CHECK: *[[ARG_1:.*]] = 11 + [[ARG_0:.*]];
|
// CHECK: *[[ARG_1:.*]] = 11 + [[ARG_0:.*]];
|
||||||
|
|
|
@ -5,7 +5,7 @@ func @test_scf_if(%arg0: index, %arg1: memref<16xindex>) {
|
||||||
%c0 = constant 0 : index
|
%c0 = constant 0 : index
|
||||||
|
|
||||||
// CHECK: int val2 = val0 + 11;
|
// CHECK: int val2 = val0 + 11;
|
||||||
// CHECK: ap_int<1> val3 = val2 > 0;
|
// CHECK: bool val3 = val2 > 0;
|
||||||
// CHECK: int val4;
|
// CHECK: int val4;
|
||||||
// CHECK: int val5[16];
|
// CHECK: int val5[16];
|
||||||
// CHECK: if (val3) {
|
// CHECK: if (val3) {
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
// CHECK: float [[VAL_0:.*]],
|
// CHECK: float [[VAL_0:.*]],
|
||||||
// CHECK: double [[VAL_1:.*]],
|
// CHECK: double [[VAL_1:.*]],
|
||||||
// CHECK: int [[VAL_2:.*]],
|
// CHECK: int [[VAL_2:.*]],
|
||||||
// CHECK: ap_int<1> [[VAL_3:.*]],
|
// CHECK: bool [[VAL_3:.*]],
|
||||||
// CHECK: ap_int<11> [[VAL_4:.*]],
|
// CHECK: ap_int<11> [[VAL_4:.*]],
|
||||||
// CHECK: ap_int<32> [[VAL_5:.*]],
|
// CHECK: ap_int<32> [[VAL_5:.*]],
|
||||||
// CHECK: ap_uint<32> [[VAL_6:.*]],
|
// CHECK: ap_uint<32> [[VAL_6:.*]],
|
||||||
|
@ -17,7 +17,7 @@
|
||||||
// CHECK: float *[[VAL_13:.*]],
|
// CHECK: float *[[VAL_13:.*]],
|
||||||
// CHECK: double *[[VAL_14:.*]],
|
// CHECK: double *[[VAL_14:.*]],
|
||||||
// CHECK: int *[[VAL_15:.*]],
|
// CHECK: int *[[VAL_15:.*]],
|
||||||
// CHECK: ap_int<1> *[[VAL_16:.*]],
|
// CHECK: bool *[[VAL_16:.*]],
|
||||||
// CHECK: ap_int<11> *[[VAL_17:.*]],
|
// CHECK: ap_int<11> *[[VAL_17:.*]],
|
||||||
// CHECK: ap_int<32> *[[VAL_18:.*]],
|
// CHECK: ap_int<32> *[[VAL_18:.*]],
|
||||||
// CHECK: ap_uint<32> *[[VAL_19:.*]],
|
// CHECK: ap_uint<32> *[[VAL_19:.*]],
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue