[HLSKernelToAffine] impl of GemmOp and SymmOp lowering
This commit is contained in:
parent
29aff5e6fb
commit
ac1c9966a8
|
@ -40,7 +40,7 @@ def SymmOp : HLSKernelOp<"symm", [HLSKernelOpInterface]> {
|
||||||
C = alpha * B * A + beta * C,
|
C = alpha * B * A + beta * C,
|
||||||
A: N x N, symmetric,
|
A: N x N, symmetric,
|
||||||
|
|
||||||
UPLO (false / true): A is upper / lower triangular,
|
UPLO (false / true): A is lower / upper triangular,
|
||||||
|
|
||||||
B: M x N,
|
B: M x N,
|
||||||
C: M x N
|
C: M x N
|
||||||
|
@ -68,7 +68,7 @@ def SyrkOp : HLSKernelOp<"syrk", [HLSKernelOpInterface]> {
|
||||||
C = alpha * A^T * A + beta * C,
|
C = alpha * A^T * A + beta * C,
|
||||||
A: K x N,
|
A: K x N,
|
||||||
|
|
||||||
UPLO (false / true): C is upper / lower triangular,
|
UPLO (false / true): C is lower / upper triangular,
|
||||||
|
|
||||||
C: N x N, symmetric
|
C: N x N, symmetric
|
||||||
}];
|
}];
|
||||||
|
@ -96,7 +96,7 @@ def Syr2kOp : HLSKernelOp<"syr2k", [HLSKernelOpInterface]> {
|
||||||
A: K x N,
|
A: K x N,
|
||||||
B: K x N,
|
B: K x N,
|
||||||
|
|
||||||
UPLO (false / true): C is upper / lower triangular,
|
UPLO (false / true): C is lower / upper triangular,
|
||||||
|
|
||||||
C: N x N, symmetric
|
C: N x N, symmetric
|
||||||
}];
|
}];
|
||||||
|
@ -123,7 +123,7 @@ def TrmmOp : HLSKernelOp<"trmm", [HLSKernelOpInterface]> {
|
||||||
B = alpha * B * op(A),
|
B = alpha * B * op(A),
|
||||||
A: N x N, triangular,
|
A: N x N, triangular,
|
||||||
|
|
||||||
UPLO (false / true): A is upper / lower triangular,
|
UPLO (false / true): A is lower / upper triangular,
|
||||||
TRANSA (false / true): op(A) = A / op(A) = A^T,
|
TRANSA (false / true): op(A) = A / op(A) = A^T,
|
||||||
DIAG (false / true): A is non-unit / unit triangular,
|
DIAG (false / true): A is non-unit / unit triangular,
|
||||||
|
|
||||||
|
|
|
@ -44,13 +44,42 @@ private:
|
||||||
OpBuilder &builder;
|
OpBuilder &builder;
|
||||||
Location loc;
|
Location loc;
|
||||||
|
|
||||||
// Helpers for creating loops, loads, stores and binary operations.
|
// Helpers for creating loops.
|
||||||
Value createLoop(unsigned upper, unsigned step = 1, unsigned lower = 0) {
|
// Constant upper and lower bound.
|
||||||
|
Value createLoop(int64_t upper, int64_t lower = 0, int64_t step = 1) {
|
||||||
auto loop = builder.create<mlir::AffineForOp>(loc, lower, upper, step);
|
auto loop = builder.create<mlir::AffineForOp>(loc, lower, upper, step);
|
||||||
builder.setInsertionPointToStart(&loop.getLoopBody().front());
|
builder.setInsertionPointToStart(&loop.getLoopBody().front());
|
||||||
return loop.getInductionVar();
|
return loop.getInductionVar();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// General case.
|
||||||
|
Value createLoop(std::initializer_list<Value> upper, AffineMap upperMap,
|
||||||
|
std::initializer_list<Value> lower, AffineMap lowerMap,
|
||||||
|
int64_t step = 1) {
|
||||||
|
auto loop = builder.create<mlir::AffineForOp>(loc, lower, lowerMap, upper,
|
||||||
|
upperMap, step);
|
||||||
|
builder.setInsertionPointToStart(&loop.getLoopBody().front());
|
||||||
|
return loop.getInductionVar();
|
||||||
|
}
|
||||||
|
|
||||||
|
Value createLoop(Value upper, Value lower, int64_t step = 1) {
|
||||||
|
auto indexMap = AffineMap::get(1, 0, getDim(0), builder.getContext());
|
||||||
|
return createLoop({upper}, indexMap, {lower}, indexMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value createLoop(int64_t upper, Value lower, int64_t step = 1) {
|
||||||
|
auto lowerMap = AffineMap::get(1, 0, getDim(0), builder.getContext());
|
||||||
|
auto upperMap = AffineMap::get(0, 0, getConst(upper), builder.getContext());
|
||||||
|
return createLoop({}, upperMap, {lower}, lowerMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value createLoop(Value upper, int64_t lower, int64_t step = 1) {
|
||||||
|
auto lowerMap = AffineMap::get(0, 0, getConst(lower), builder.getContext());
|
||||||
|
auto upperMap = AffineMap::get(1, 0, getDim(0), builder.getContext());
|
||||||
|
return createLoop({upper}, upperMap, {}, lowerMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helpers for creating loads, stores and binary operations.
|
||||||
Value createLoad(Value array, std::initializer_list<Value> index) {
|
Value createLoad(Value array, std::initializer_list<Value> index) {
|
||||||
return builder.create<mlir::AffineLoadOp>(loc, array,
|
return builder.create<mlir::AffineLoadOp>(loc, array,
|
||||||
ArrayRef<Value>(index));
|
ArrayRef<Value>(index));
|
||||||
|
@ -319,9 +348,100 @@ bool HLSKernelVisitor::visitOp(MergeOp op) {
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
// Only default attributes configuration are supported.
|
// Only default attributes configuration are supported.
|
||||||
bool HLSKernelVisitor::visitOp(GemmOp op) { return true; }
|
bool HLSKernelVisitor::visitOp(GemmOp op) {
|
||||||
|
auto alpha = op.getOperand(0);
|
||||||
|
auto beta = op.getOperand(1);
|
||||||
|
|
||||||
bool HLSKernelVisitor::visitOp(SymmOp op) { return true; }
|
auto A = op.getOperand(2);
|
||||||
|
auto B = op.getOperand(3);
|
||||||
|
auto C = op.getOperand(4);
|
||||||
|
|
||||||
|
auto AShape = A.getType().cast<MemRefType>().getShape();
|
||||||
|
auto CShape = C.getType().cast<MemRefType>().getShape();
|
||||||
|
|
||||||
|
// Set insertion point of builder.
|
||||||
|
builder.setInsertionPoint(op);
|
||||||
|
|
||||||
|
// Create M dimension loop.
|
||||||
|
auto m = createLoop(CShape[0]);
|
||||||
|
|
||||||
|
// Create N dimension loop.
|
||||||
|
auto n = createLoop(CShape[1]);
|
||||||
|
|
||||||
|
// Update C with beta * C.
|
||||||
|
auto initC = createLoad(C, {m, n});
|
||||||
|
auto betaC = createBinaryOp<mlir::MulFOp>(beta, initC);
|
||||||
|
createStore(betaC, C, {m, n});
|
||||||
|
|
||||||
|
// Create K dimension loop.
|
||||||
|
auto k = createLoop(AShape[1]);
|
||||||
|
|
||||||
|
// Accumulate C with alpha * A * B.
|
||||||
|
auto valA = createLoad(A, {m, k});
|
||||||
|
auto valB = createLoad(B, {k, n});
|
||||||
|
auto valC = createLoad(C, {m, n});
|
||||||
|
|
||||||
|
auto alphaA = createBinaryOp<mlir::MulFOp>(alpha, valA);
|
||||||
|
auto alphaAB = createBinaryOp<mlir::MulFOp>(alphaA, valB);
|
||||||
|
auto accumC = createBinaryOp<mlir::AddFOp>(alphaAB, valC);
|
||||||
|
createStore(accumC, C, {m, n});
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HLSKernelVisitor::visitOp(SymmOp op) {
|
||||||
|
auto alpha = op.getOperand(0);
|
||||||
|
auto beta = op.getOperand(1);
|
||||||
|
|
||||||
|
auto A = op.getOperand(2);
|
||||||
|
auto B = op.getOperand(3);
|
||||||
|
auto C = op.getOperand(4);
|
||||||
|
|
||||||
|
auto CShape = C.getType().cast<MemRefType>().getShape();
|
||||||
|
|
||||||
|
// Set insertion point of builder.
|
||||||
|
builder.setInsertionPoint(op);
|
||||||
|
|
||||||
|
// Create M dimension loop.
|
||||||
|
auto m = createLoop(CShape[0]);
|
||||||
|
|
||||||
|
// Create N dimension loop.
|
||||||
|
auto n = createLoop(CShape[1]);
|
||||||
|
|
||||||
|
// Update C with beta * C.
|
||||||
|
auto initC = createLoad(C, {m, n});
|
||||||
|
auto betaC = createBinaryOp<mlir::MulFOp>(beta, initC);
|
||||||
|
createStore(betaC, C, {m, n});
|
||||||
|
|
||||||
|
// Create K dimension loop for lower triangle.
|
||||||
|
auto lk = createLoop(m, 0);
|
||||||
|
|
||||||
|
// Accumulate C with alpha * A * B.
|
||||||
|
auto valA = createLoad(A, {m, lk});
|
||||||
|
auto valB = createLoad(B, {lk, n});
|
||||||
|
auto valC = createLoad(C, {m, n});
|
||||||
|
|
||||||
|
auto alphaA = createBinaryOp<mlir::MulFOp>(alpha, valA);
|
||||||
|
auto alphaAB = createBinaryOp<mlir::MulFOp>(alphaA, valB);
|
||||||
|
auto accumC = createBinaryOp<mlir::AddFOp>(alphaAB, valC);
|
||||||
|
createStore(accumC, C, {m, n});
|
||||||
|
|
||||||
|
// Create K dimension loop for upper triangle.
|
||||||
|
builder.setInsertionPoint(n.getParentBlock()->getTerminator());
|
||||||
|
auto hk = createLoop(CShape[0], m);
|
||||||
|
|
||||||
|
// Accumulate C with alpha * A * B.
|
||||||
|
valA = createLoad(A, {hk, m});
|
||||||
|
valB = createLoad(B, {hk, n});
|
||||||
|
valC = createLoad(C, {m, n});
|
||||||
|
|
||||||
|
alphaA = createBinaryOp<mlir::MulFOp>(alpha, valA);
|
||||||
|
alphaAB = createBinaryOp<mlir::MulFOp>(alphaA, valB);
|
||||||
|
accumC = createBinaryOp<mlir::AddFOp>(alphaAB, valC);
|
||||||
|
createStore(accumC, C, {m, n});
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool HLSKernelVisitor::visitOp(SyrkOp op) { return true; }
|
bool HLSKernelVisitor::visitOp(SyrkOp op) { return true; }
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
// RUN: scalehls-opt -hlskernel-to-affine %s | FileCheck %s
|
||||||
|
|
||||||
|
// CHECK: module {
|
||||||
|
func @test_gemm(%A: memref<32x16xf32>, %B: memref<16x8xf32>, %C: memref<32x8xf32>) -> () {
|
||||||
|
%alpha = constant 11.0 : f32
|
||||||
|
%beta = constant 42.0 : f32
|
||||||
|
"hlskernel.gemm" (%alpha, %beta, %A, %B, %C) {} : (f32, f32, memref<32x16xf32>, memref<16x8xf32>, memref<32x8xf32>) -> ()
|
||||||
|
return
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
// RUN: scalehls-opt -hlskernel-to-affine %s | FileCheck %s
|
||||||
|
|
||||||
|
// CHECK: module {
|
||||||
|
func @test_symm(%A: memref<32x32xf32>, %B: memref<32x8xf32>, %C: memref<32x8xf32>) -> () {
|
||||||
|
%alpha = constant 11.0 : f32
|
||||||
|
%beta = constant 42.0 : f32
|
||||||
|
"hlskernel.symm" (%alpha, %beta, %A, %B, %C) {} : (f32, f32, memref<32x32xf32>, memref<32x8xf32>, memref<32x8xf32>) -> ()
|
||||||
|
return
|
||||||
|
}
|
Loading…
Reference in New Issue