From f3c40918e3f42ef1a73f0614b7d79916f8a7de67 Mon Sep 17 00:00:00 2001 From: Hanchen Ye Date: Mon, 5 Oct 2020 23:26:42 -0500 Subject: [PATCH] [HLSCppDialect] add array operations, and corresponding modification in HLSCppEmitter, ConverToHLSCpp --- include/Analysis/QoREstimation.h | 12 ++- include/Analysis/StaticParam.h | 41 +++++++- include/Dialect/HLSCpp/PragmaOps.td | 6 +- include/Dialect/HLSCpp/StructureOps.td | 30 ++++++ include/Visitor.h | 7 +- lib/Analysis/QoREstimation.cpp | 14 +++ .../ConvertToHLSCpp/ConvertToHLSCpp.cpp | 94 ++++++++++++++----- lib/EmitHLSCpp/EmitHLSCpp.cpp | 14 ++- test/Analysis/QoREstimation/test_for.mlir | 4 - .../ConvertToHLSCpp/test_assign.mlir | 13 --- .../ConvertToHLSCpp/test_conversion.mlir | 15 +++ 11 files changed, 199 insertions(+), 51 deletions(-) delete mode 100644 test/Conversion/ConvertToHLSCpp/test_assign.mlir create mode 100644 test/Conversion/ConvertToHLSCpp/test_conversion.mlir diff --git a/include/Analysis/QoREstimation.h b/include/Analysis/QoREstimation.h index 3157fa0..0ea258a 100644 --- a/include/Analysis/QoREstimation.h +++ b/include/Analysis/QoREstimation.h @@ -49,9 +49,17 @@ public: explicit QoREstimator(ProcParam &procParam, MemParam &memParam, std::string targetSpecPath, std::string opLatencyPath); - using ScheduleMap = llvm::SmallDenseMap; + // For storing the scheduled time stamp of operations; + using ScheduleMap = llvm::SmallDenseMap; + + // For storing each memory access operations indexed by its targed memory + // value symbol. using MemAccess = std::pair; - using MemAccessList = SmallVector; + using MemAccessList = SmallVector; + + // For storing required memory ports for each partition of each array. + using MemPort = SmallVector; + using MemPortMap = llvm::SmallDenseMap; // This flag indicates that currently the estimator is in a pipelined region, // which will impact the estimation strategy. diff --git a/include/Analysis/StaticParam.h b/include/Analysis/StaticParam.h index 5938f6b..cc0f76a 100644 --- a/include/Analysis/StaticParam.h +++ b/include/Analysis/StaticParam.h @@ -85,7 +85,46 @@ enum class MemParamKind { /// This class includes all possible parameters kind for "processes" (function, /// for/parallel loop, and if). -class ProcParam : public ParamBase {}; +class ProcParam : public ParamBase { + // Process-related pragam configurations. + unsigned getEnablePipeline(Operation *op) { + return get(op, ProcParamKind::EnablePipeline); + } + unsigned getUnrollFactor(Operation *op) { + return get(op, ProcParamKind::UnrollFactor); + } + + // Process attributes. + unsigned getLowerBound(Operation *op) { + return get(op, ProcParamKind::LowerBound); + } + unsigned getUpperBound(Operation *op) { + return get(op, ProcParamKind::UpperBound); + } + unsigned getIterNumber(Operation *op) { + return get(op, ProcParamKind::IterNumber); + } + unsigned getIsPerfect(Operation *op) { + return get(op, ProcParamKind::IsPerfect); + } + + // Performance parameters. + unsigned getInitInterval(Operation *op) { + return get(op, ProcParamKind::InitInterval); + } + unsigned getIterLatency(Operation *op) { + return get(op, ProcParamKind::IterLatency); + } + unsigned getPipeIterNumber(Operation *op) { + return get(op, ProcParamKind::PipeIterNumber); + } + unsigned getLatency(Operation *op) { return get(op, ProcParamKind::Latency); } + + // Resource parameters. + unsigned getLUT(Operation *op) { return get(op, ProcParamKind::LUT); } + unsigned getBRAM(Operation *op) { return get(op, ProcParamKind::BRAM); } + unsigned getDSP(Operation *op) { return get(op, ProcParamKind::DSP); } +}; /// This class includes all possible parameters kind for memories (memref, /// tensor, and vector). diff --git a/include/Dialect/HLSCpp/PragmaOps.td b/include/Dialect/HLSCpp/PragmaOps.td index d5409e6..41feaf7 100644 --- a/include/Dialect/HLSCpp/PragmaOps.td +++ b/include/Dialect/HLSCpp/PragmaOps.td @@ -1,13 +1,11 @@ //===-------------------------------------------------------*- tablegen -*-===// -// +// Deprecated //===----------------------------------------------------------------------===// #ifndef SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD #define SCALEHLS_DIALECT_HLSCPP_PRAGMAOPS_TD -def ArrayPragmaOp : HLSCppOp<"array_pragma", [ - PragmaOpInterface -]> { +def ArrayPragmaOp : HLSCppOp<"array_pragma", [PragmaOpInterface]> { let summary = "Apply array pragmas"; let description = [{ This hlscpp.func_pragma operation represent pragmas for arrays, such as diff --git a/include/Dialect/HLSCpp/StructureOps.td b/include/Dialect/HLSCpp/StructureOps.td index faaf16e..4c640c5 100644 --- a/include/Dialect/HLSCpp/StructureOps.td +++ b/include/Dialect/HLSCpp/StructureOps.td @@ -18,6 +18,36 @@ def AssignOp : HLSCppOp<"assign", [SameOperandsAndResultType]> { let results = (outs AnyType : $output); } +def ArrayOp : HLSCppOp<"array", [SameOperandsAndResultType]> { + let summary = "A C++ array instance"; + let description = [{ + This hlscpp.array operation represent an array in C++. All shaped type value + (e.g., memref, tensor, and vector) should be passed through this operation + after declared by an allocation (e.g., Alloc, etc.) operation or in the + signature of a function. This will help the compiler to easily manage the + attributs and statistics of arrays. + }]; + + let arguments = (ins Type : $input, + + // Interface-related attributes. + OptionalAttr : $interface, + OptionalAttr : $interface_mode, + OptionalAttr : $interface_depth, + + // BindStorage-related attributes. + OptionalAttr : $storage_type, + OptionalAttr : $storage_impl, + + // ArrayPartition-related attributes. + OptionalAttr : $partition, + OptionalAttr : $partition_type, + OptionalAttr : $partition_factor + ); + + let results = (outs Type : $output); +} + def EndOp : HLSCppOp<"end", [Terminator]> { let summary = "Mark the end of a HLSCpp region"; let description = [{ diff --git a/include/Visitor.h b/include/Visitor.h index 955cb85..13a9e46 100644 --- a/include/Visitor.h +++ b/include/Visitor.h @@ -47,7 +47,9 @@ public: AddCFOp, SubCFOp, ImOp, ReOp, CreateComplexOp, // Special operations. SelectOp, ConstantOp, CopySignOp, TruncateIOp, ZeroExtendIOp, - SignExtendIOp, IndexCastOp, CallOp, ReturnOp, AssignOp, EndOp, + SignExtendIOp, IndexCastOp, CallOp, ReturnOp, + // Structure operations. + AssignOp, ArrayOp, EndOp, // Pragma operations. LoopPragmaOp, FuncPragmaOp, ArrayPragmaOp>( [&](auto opNode) -> ResultType { @@ -171,7 +173,10 @@ public: HANDLE(IndexCastOp); HANDLE(CallOp); HANDLE(ReturnOp); + + // Structure operations. HANDLE(AssignOp); + HANDLE(ArrayOp); HANDLE(EndOp); // Pragma operations. diff --git a/lib/Analysis/QoREstimation.cpp b/lib/Analysis/QoREstimation.cpp index c94e97b..b309c38 100644 --- a/lib/Analysis/QoREstimation.cpp +++ b/lib/Analysis/QoREstimation.cpp @@ -13,6 +13,10 @@ using namespace mlir; using namespace scalehls; using namespace hlscpp; +//===----------------------------------------------------------------------===// +// Utils +//===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // HLSCppAnalyzer Class Definition //===----------------------------------------------------------------------===// @@ -252,6 +256,16 @@ bool QoREstimator::visitOp(AffineForOp op) { unsigned initInterval = 1; initInterval = getBlockII(body.front(), opScheduleMap, memLoadList, memStoreList, initInterval); + + // Calculate initial interval caused by limited memory ports. For now, we + // just consider the memory access inside of the pipeline region, aks the + // extra memory ports caused by unroll optimization out of the pipeline + // region are not calculated. + MemPortMap memLoadPortMap; + MemPortMap memStorePortMap; + for (auto &op : body.front()) { + } + procParam.set(op, ProcParamKind::InitInterval, initInterval); procParam.set(op, ProcParamKind::Latency, diff --git a/lib/Conversion/ConvertToHLSCpp/ConvertToHLSCpp.cpp b/lib/Conversion/ConvertToHLSCpp/ConvertToHLSCpp.cpp index f213403..5bce506 100644 --- a/lib/Conversion/ConvertToHLSCpp/ConvertToHLSCpp.cpp +++ b/lib/Conversion/ConvertToHLSCpp/ConvertToHLSCpp.cpp @@ -15,35 +15,83 @@ namespace { class ConvertToHLSCppPass : public mlir::PassWrapper> { public: - void runOnOperation() override { - for (auto &funcOp : getOperation()) { - if (auto func = dyn_cast(funcOp)) { - if (func.getBlocks().size() != 1) - func.emitError("has zero or more than one basic blocks"); + void runOnOperation() override; +}; +} // namespace - if (auto returnOp = dyn_cast(func.front().getTerminator())) { - auto builder = OpBuilder(returnOp); - unsigned operandIdx = 0; - for (auto operand : returnOp.getOperands()) { +void ConvertToHLSCppPass::runOnOperation() { + for (auto func : getOperation().getOps()) { + if (func.getBlocks().size() != 1) + func.emitError("has zero or more than one basic blocks"); - if (operand.getKind() == Value::Kind::BlockArgument) { - auto newValue = builder.create( - returnOp.getLoc(), operand.getType(), operand); - returnOp.setOperand(operandIdx, newValue); - } else if (isa(operand.getDefiningOp())) { - auto newValue = builder.create( - returnOp.getLoc(), operand.getType(), operand); - returnOp.setOperand(operandIdx, newValue); - } - operandIdx += 1; + auto b = OpBuilder(func); + + // Insert AssignOp. + if (auto returnOp = dyn_cast(func.front().getTerminator())) { + b.setInsertionPoint(returnOp); + unsigned idx = 0; + for (auto operand : returnOp.getOperands()) { + if (operand.getKind() == Value::Kind::BlockArgument) { + auto value = + b.create(returnOp.getLoc(), operand.getType(), operand); + returnOp.setOperand(idx, value); + } else if (isa(operand.getDefiningOp())) { + auto value = + b.create(returnOp.getLoc(), operand.getType(), operand); + returnOp.setOperand(idx, value); + } + idx += 1; + } + } else + func.emitError("doesn't have a return as terminator."); + + // Set function pragma attributes. + func.setAttr("dataflow", b.getBoolAttr(false)); + + for (auto &op : func.front()) { + if (auto forOp = dyn_cast(op)) { + if (forOp.getLoopBody().getBlocks().size() != 1) + forOp.emitError("has zero or more than one basic blocks"); + + // Set loop pragma attributes. + forOp.setAttr("pipeline", b.getBoolAttr(false)); + forOp.setAttr("pipeline_II", b.getUI32IntegerAttr(1)); + forOp.setAttr("unroll_factor", b.getUI32IntegerAttr(1)); + } + + for (auto operand : op.getOperands()) { + if (auto arrayType = operand.getType().dyn_cast()) { + bool insertArrayOp = false; + if (operand.getKind() == Value::Kind::BlockArgument) + insertArrayOp = true; + else if (!isa(operand.getDefiningOp())) { + insertArrayOp = true; + if (!arrayType.hasStaticShape()) + operand.getDefiningOp()->emitError( + "is unranked or has dynamic shape which is illegal."); } - } else - func.emitError("doesn't have a return operation as terminator"); + + if (insertArrayOp) { + // Insert array operation and set attributes. + b.setInsertionPointAfterValue(operand); + auto arrayOp = + b.create(op.getLoc(), operand.getType(), operand); + operand.replaceAllUsesExcept(arrayOp.getResult(), + SmallPtrSet{arrayOp}); + + // Set array pragma attributes, default array instance is ram_1p + // bram. Other attributes are not set here since they requires more + // analysis to be determined. + arrayOp.setAttr("interface", b.getBoolAttr(false)); + arrayOp.setAttr("storage_type", b.getStringAttr("ram_1p")); + arrayOp.setAttr("storage_impl", b.getStringAttr("bram")); + arrayOp.setAttr("partition", b.getBoolAttr(false)); + } + } } } } -}; -} // namespace +} void hlscpp::registerConvertToHLSCppPass() { PassRegistration( diff --git a/lib/EmitHLSCpp/EmitHLSCpp.cpp b/lib/EmitHLSCpp/EmitHLSCpp.cpp index d2d572b..6a24cba 100644 --- a/lib/EmitHLSCpp/EmitHLSCpp.cpp +++ b/lib/EmitHLSCpp/EmitHLSCpp.cpp @@ -175,7 +175,10 @@ public: void emitConstant(ConstantOp *op); void emitIndexCast(IndexCastOp *op); void emitCall(CallOp *op); + + /// Structure operations emitters. void emitAssign(AssignOp *op); + void emitArray(ArrayOp *op); /// Pragma operation emitters. void emitLoopPragma(LoopPragmaOp *op); @@ -401,8 +404,6 @@ public: bool visitOp(IndexCastOp op) { return emitter.emitIndexCast(&op), true; } bool visitOp(CallOp op) { return emitter.emitCall(&op), true; } bool visitOp(ReturnOp op) { return true; } - bool visitOp(AssignOp op) { return emitter.emitAssign(&op), true; } - bool visitOp(EndOp op) { return true; } private: ModuleEmitter &emitter; @@ -466,6 +467,11 @@ public: using HLSCppVisitorBase::visitOp; + /// Structure operations. + bool visitOp(AssignOp op) { return emitter.emitAssign(&op), true; } + bool visitOp(ArrayOp op) { return emitter.emitArray(&op), true; } + bool visitOp(EndOp op) { return true; } + /// Pragma operations. bool visitOp(LoopPragmaOp op) { return emitter.emitLoopPragma(&op), true; } bool visitOp(FuncPragmaOp op) { return emitter.emitFuncPragma(&op), true; } @@ -1013,6 +1019,7 @@ void ModuleEmitter::emitCall(CallOp *op) { // TODO } +/// Structure operation emitters. void ModuleEmitter::emitAssign(AssignOp *op) { unsigned rank = emitNestedLoopHead(op->getResult()); indent(); @@ -1023,8 +1030,9 @@ void ModuleEmitter::emitAssign(AssignOp *op) { emitNestedLoopTail(rank); } -/// Pragma operation emitters. +void ModuleEmitter::emitArray(ArrayOp *op) {} +/// Pragma operation emitters. void ModuleEmitter::emitLoopPragma(LoopPragmaOp *op) { indent(); os << "#pragma HLS unroll"; diff --git a/test/Analysis/QoREstimation/test_for.mlir b/test/Analysis/QoREstimation/test_for.mlir index 1fbe53b..aaa8c10 100644 --- a/test/Analysis/QoREstimation/test_for.mlir +++ b/test/Analysis/QoREstimation/test_for.mlir @@ -2,15 +2,11 @@ // CHECK-LABEL: func @test_for func @test_for(%arg0: memref<16x4x4xindex>, %arg1: memref<16x4x4xindex>) { - "hlscpp.func_pragma" () {dataflow = true} : () -> () "hlscpp.array_pragma" (%arg0) {partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[4 : ui32, 2 : ui32, 4 : ui32], storage_type="ram_2p", interface=true, interface_mode="bram"} : (memref<16x4x4xindex>) -> () "hlscpp.array_pragma" (%arg1) {partition=true, partition_type=["cyclic", "cyclic", "cyclic"], partition_factor=[4 : ui32, 2 : ui32, 4 : ui32], storage_type="ram_2p", interface=true, interface_mode="bram"} : (memref<16x4x4xindex>) -> () affine.for %i = 0 to 16 { - "hlscpp.loop_pragma" () {unroll_factor=4 : ui32} : () -> () affine.for %j = 0 to 4 { - "hlscpp.loop_pragma" () {pipeline=true, unroll_factor=2 : ui32} : () -> () affine.for %k = 0 to 4 { - "hlscpp.loop_pragma" () {unroll_factor=4 : ui32} : () -> () %0 = affine.load %arg0[%i, %j, %k] : memref<16x4x4xindex> %1 = affine.load %arg1[%i, %j, %k] : memref<16x4x4xindex> %2 = muli %0, %1 : index diff --git a/test/Conversion/ConvertToHLSCpp/test_assign.mlir b/test/Conversion/ConvertToHLSCpp/test_assign.mlir deleted file mode 100644 index 2acf21f..0000000 --- a/test/Conversion/ConvertToHLSCpp/test_assign.mlir +++ /dev/null @@ -1,13 +0,0 @@ -// RUN: scalehls-opt -convert-to-hlscpp %s | FileCheck %s - -func @test_assign(%arg0: f32, %arg1: memref<16xf32>) -> (f32, memref<16xf32>, i32, tensor<2x2xi32>) { - %c11_i32 = constant 11 : i32 - %cst = constant dense<[[11, 0], [0, -42]]> : tensor<2x2xi32> - - // CHECK: %[[VAL_0:.*]] = "hlscpp.assign"(%[[ARG_0:.*]]) : (f32) -> f32 - // CHECK: %[[VAL_1:.*]] = "hlscpp.assign"(%[[ARG_1:.*]]) : (memref<16xf32>) -> memref<16xf32> - // CHECK: %[[VAL_2:.*]] = "hlscpp.assign"(%c11_i32) : (i32) -> i32 - // CHECK: %[[VAL_3:.*]] = "hlscpp.assign"(%cst) : (tensor<2x2xi32>) -> tensor<2x2xi32> - // CHECK: return %[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]] : f32, memref<16xf32>, i32, tensor<2x2xi32> - return %arg0, %arg1, %c11_i32, %cst : f32, memref<16xf32>, i32, tensor<2x2xi32> -} diff --git a/test/Conversion/ConvertToHLSCpp/test_conversion.mlir b/test/Conversion/ConvertToHLSCpp/test_conversion.mlir new file mode 100644 index 0000000..d47b2ac --- /dev/null +++ b/test/Conversion/ConvertToHLSCpp/test_conversion.mlir @@ -0,0 +1,15 @@ +// RUN: scalehls-opt -convert-to-hlscpp %s | FileCheck %s + +// CHECK-LABEL: func @test_conversion( +// CHECK-SAME: %arg0: f32, %arg1: memref<16xf32>) -> (f32, memref<16xf32>, i32, tensor<2x2xi32>) attributes {dataflow = false} { +func @test_conversion(%arg0: f32, %arg1: memref<16xf32>) -> (f32, memref<16xf32>, i32, tensor<2x2xi32>) { + // CHECK: %[[VAL_0:.*]] = "hlscpp.array"(%[[ARG_1:.*]]) {interface = false, partition = false, storage_impl = "bram", storage_type = "ram_1p"} : (memref<16xf32>) -> memref<16xf32> + %c11_i32 = constant 11 : i32 + %cst = constant dense<[[11, 0], [0, -42]]> : tensor<2x2xi32> + + // CHECK: %[[VAL_1:.*]] = "hlscpp.array"(%cst) {interface = false, partition = false, storage_impl = "bram", storage_type = "ram_1p"} : (tensor<2x2xi32>) -> tensor<2x2xi32> + // CHECK: %[[VAL_2:.*]] = "hlscpp.assign"(%[[ARG_0:.*]]) : (f32) -> f32 + // CHECK: %[[VAL_2:.*]] = "hlscpp.assign"(%c11_i32) : (i32) -> i32 + // CHECK: return %[[VAL_2:.*]], %[[VAL_0:.*]], %[[VAL_3:.*]], %[[VAL_1:.*]] : f32, memref<16xf32>, i32, tensor<2x2xi32> + return %arg0, %arg1, %c11_i32, %cst : f32, memref<16xf32>, i32, tensor<2x2xi32> +}