Resurrected memref-fullrank from #49 (#86)

* Fixed linkage issue due to incorrect macro defs

* Re-implemented memref-fullrank on the latest main

* Initial local static variable support

* Actually git add test

* Disable inliner for -O0 (#90)

* Disabled inliner when -O0 is present

* Added tests for no inline

* Added test case for -O1

* Checked -memref-fullrank in existing tests

Co-authored-by: William S. Moses <gh@wsmoses.com>
This commit is contained in:
Ruizhe Zhao 2021-10-15 17:10:07 +01:00 committed by GitHub
parent c0fd591a92
commit cadc844ece
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 194 additions and 6 deletions

View File

@ -42,6 +42,67 @@ using namespace mlir;
#define DEBUG_TYPE "clang-mlir"
static cl::opt<bool>
memRefFullRank("memref-fullrank", cl::init(false),
cl::desc("Get the full rank of the memref."));
/// Try to typecast the caller arg of type MemRef to fit the corresponding
/// callee arg type. We only deal with the cast where src and dst have the same
/// shape size and elem type, and just the first shape differs: src has -1 and
/// dst has a constant integer.
static mlir::Value castCallerMemRefArg(mlir::Value callerArg,
mlir::Type calleeArgType,
mlir::OpBuilder &b) {
mlir::OpBuilder::InsertionGuard guard(b);
mlir::Type callerArgType = callerArg.getType();
if (MemRefType dstTy = calleeArgType.dyn_cast_or_null<MemRefType>()) {
MemRefType srcTy = callerArgType.dyn_cast<MemRefType>();
if (srcTy && dstTy.getElementType() == srcTy.getElementType()) {
auto srcShape = srcTy.getShape();
auto dstShape = dstTy.getShape();
if (srcShape.size() == dstShape.size() && !srcShape.empty() &&
srcShape[0] == -1 &&
std::equal(std::next(srcShape.begin()), srcShape.end(),
std::next(dstShape.begin()))) {
b.setInsertionPointAfterValue(callerArg);
return b.create<mlir::memref::CastOp>(callerArg.getLoc(), callerArg,
calleeArgType);
}
}
}
// Return the original value when casting fails.
return callerArg;
}
/// Typecast the caller args to match the callee's signature. Mismatches that
/// cannot be resolved by given rules won't raise exceptions, e.g., if the
/// expected type for an arg is memref<10xi8> while the provided is
/// memref<20xf32>, we will simply ignore the case in this function and wait for
/// the rest of the pipeline to detect it.
static void castCallerArgs(mlir::FuncOp callee,
llvm::SmallVectorImpl<mlir::Value> &args,
mlir::OpBuilder &b) {
mlir::FunctionType funcTy = callee.getType().cast<mlir::FunctionType>();
assert(args.size() == funcTy.getNumInputs() &&
"The caller arguments should have the same size as the number of "
"callee arguments as the interface.");
for (unsigned i = 0; i < args.size(); ++i) {
mlir::Type calleeArgType = funcTy.getInput(i);
mlir::Type callerArgType = args[i].getType();
if (calleeArgType == callerArgType)
continue;
if (calleeArgType.isa<MemRefType>())
args[i] = castCallerMemRefArg(args[i], calleeArgType, b);
}
}
class IfScope {
public:
MLIRScanner &scanner;
@ -3305,6 +3366,9 @@ ValueCategory MLIRScanner::VisitCallExpr(clang::CallExpr *expr) {
return nullptr;
}
// Try to rescue some mismatched types.
castCallerArgs(tocall, args, builder);
auto op = builder.create<mlir::CallOp>(loc, tocall, args);
for (auto pair : toRestore) {
pair.second.store(builder, pair.first, /*isArray*/ true);
@ -5854,6 +5918,22 @@ mlir::Location MLIRASTConsumer::getMLIRLocation(clang::SourceLocation loc) {
return FileLineColLoc::get(ctx, fileId, lineNumber, colNumber);
}
/// Iteratively get the size of each dim of the given ConstantArrayType inst.
static void getConstantArrayShapeAndElemType(const clang::QualType &ty,
SmallVectorImpl<int64_t> &shape,
clang::QualType &elemTy) {
shape.clear();
clang::QualType curTy = ty;
while (curTy->isConstantArrayType()) {
auto cstArrTy = cast<clang::ConstantArrayType>(curTy);
shape.push_back(cstArrTy->getSize().getSExtValue());
curTy = cstArrTy->getElementType();
}
elemTy = curTy;
}
mlir::Type MLIRASTConsumer::getMLIRType(clang::QualType qt, bool *implicitRef,
bool allowMerge) {
if (auto ET = dyn_cast<clang::ElaboratedType>(qt)) {
@ -5879,6 +5959,24 @@ mlir::Type MLIRASTConsumer::getMLIRType(clang::QualType qt, bool *implicitRef,
bool assumeRef = false;
auto mlirty = getMLIRType(DT->getOriginalType(), &assumeRef, allowMerge);
if (assumeRef) {
// Constant array types like `int A[30][20]` will be converted to LLVM
// type `[20 x i32]* %0`, which has the outermost dimension size erased,
// and we can only recover to `memref<?x20xi32>` from there. This prevents
// us from doing more comprehensive analysis. Here we specifically handle
// this case by unwrapping the clang-adjusted type, to get the
// corresponding ConstantArrayType with the full dimensions.
if (memRefFullRank) {
clang::QualType origTy = DT->getOriginalType();
if (origTy->isConstantArrayType()) {
SmallVector<int64_t, 4> shape;
clang::QualType elemTy;
getConstantArrayShapeAndElemType(origTy, shape, elemTy);
return mlir::MemRefType::get(shape, getMLIRType(elemTy));
}
}
// If -memref-fullrank is unset or it cannot be fulfilled.
auto mt = mlirty.dyn_cast<MemRefType>();
auto shape2 = std::vector<int64_t>(mt.getShape());
shape2[0] = -1;

View File

@ -1,15 +1,15 @@
// RUN: mlir-clang %s --function=* -S | FileCheck %s
// RUN: mlir-clang %s --function=* -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
void sub0(int a[2]);
void sub(int a[2]) {
a[2]++;
}
void sub(int a[2]) { a[2]++; }
void kernel_deriche() {
int a[2];
sub0(a);
}
// FULLRANK: @sub(%arg0: memref<2xi32>)
// CHECK: @sub(%arg0: memref<?xi32>)
// CHECK-NEXT: %c1_i32 = constant 1 : i32
// CHECK-NEXT: %0 = affine.load %arg0[2] : memref<?xi32>
@ -24,4 +24,3 @@ void kernel_deriche() {
// CHECK-NEXT: call @sub0(%1) : (memref<?xi32>) -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s %stdinclude -S | FileCheck %s
// RUN: mlir-clang %s %stdinclude -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#include <stdio.h>
#include <unistd.h>
@ -37,3 +38,6 @@ int main()
// CHECK-NEXT: call @init_array(%1) : (memref<?xi32>) -> ()
// CHECK-NEXT: return %c0_i32 : i32
// CHECK-NEXT: }
// FULLRANK: %[[MEM:.*]] = memref.alloc() : memref<2800xi32>
// FULLRANK: call @init_array(%[[MEM]]) : (memref<2800xi32>) -> ()

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s %stdinclude -S | FileCheck %s
// RUN: mlir-clang %s %stdinclude -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#include <stdio.h>
#include <unistd.h>
@ -38,3 +39,6 @@ int main()
// CHECK-NEXT: memref.dealloc %0 : memref<2800xi32>
// CHECK-NEXT: return %c0_i32 : i32
// CHECK-NEXT: }
// FULLRANK: %[[MEM:.*]] = memref.alloc() : memref<2800xi32>
// FULLRANK: call @init_array(%[[MEM]]) : (memref<2800xi32>) -> ()

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=kernel_deriche -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_deriche -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
int kernel_deriche(int a[30][40]) {
a[3][5]++;
@ -13,3 +14,5 @@ int kernel_deriche(int a[30][40]) {
// CHECK-NEXT: %2 = affine.load %arg0[1, 2] : memref<?x40xi32>
// CHECK-NEXT: return %2 : i32
// CHECK-NEXT: }
// FULLRANK: func @kernel_deriche(%arg0: memref<30x40xi32>) -> i32

View File

@ -0,0 +1,33 @@
// RUN: mlir-clang %s -S -memref-fullrank -O0 | FileCheck %s
#include <stdio.h>
int f(int A[10][20]) {
int i, j, sum = 0;
#pragma scop
for (i = 0; i < 10; i++)
for (j = 0; j < 20; j++)
sum += A[i][j];
#pragma endscop
return sum;
}
int g(int A[10][20]) {
int c = f(A);
printf("%d\n", c);
return 0;
}
int main() {
int A[10][20];
return g(A);
}
// CHECK: func @main()
// CHECK: %[[VAL0:.*]] = memref.alloca() : memref<10x20xi32>
// CHECK: %{{.*}} = call @g(%[[VAL0]]) : (memref<10x20xi32>) -> i32
// CHECK: func @g(%arg0: memref<10x20xi32>) -> i32
// CHECK: func @f(%arg0: memref<10x20xi32>) -> i32

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=kernel_deriche -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_deriche -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
int kernel_deriche(int a[30]) {
a[0]++;
@ -13,3 +14,5 @@ int kernel_deriche(int a[30]) {
// CHECK-NEXT: %2 = affine.load %arg0[1] : memref<?xi32>
// CHECK-NEXT: return %2 : i32
// CHECK-NEXT: }
// FULLRANK: func @kernel_deriche(%arg0: memref<30xi32>) -> i32

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s %stdinclude --function=init_array -S | FileCheck %s
// RUN: mlir-clang %s %stdinclude --function=init_array -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#include <stdio.h>
#include <unistd.h>
@ -63,3 +64,5 @@ void init_array (int path[10][10])
// CHECK-NEXT: return
// CHECK-NEXT: }
// CHECK-NEXT: }
// FULLRANK: func @init_array(%{{.*}}: memref<10x10xi32>)

View File

@ -1,4 +1,6 @@
// RUN: mlir-clang %s --function=kernel_nussinov -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_nussinov -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#define N 5500
#define max_score(s1, s2) ((s1 >= s2) ? s1 : s2)
@ -15,6 +17,8 @@
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: @kernel_nussinov(%{{.*}}: i32, %{{.*}}: memref<5500xi32>)
void kernel_nussinov(int n, int table[N])
{
int j;

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s %stdinclude --function=init_array -S | FileCheck %s
// RUN: mlir-clang %s %stdinclude --function=init_array -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#include <stdio.h>
#include <unistd.h>
@ -26,3 +27,6 @@ void init_array (int n)
// CHECK-NEXT: call @use(%1) : (memref<?xf64>) -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: %[[VAL0:.*]] = memref.alloc() : memref<20xf64>
// FULLRANK: call @use(%[[VAL0]]) : (memref<20xf64>) -> ()

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=kernel_correlation -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_correlation -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#define DATA_TYPE double
@ -38,3 +39,5 @@ void kernel_correlation(double out[28], double stddev[28], _Bool cmp)
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: func @kernel_correlation(%{{.*}}: memref<28xf64>, %{{.*}}: memref<28xf64>, %{{.*}}: i1)

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#define DATA_TYPE double
@ -21,3 +22,5 @@ void kernel_correlation(int m, double corr[28])
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: func @kernel_correlation(%{{.*}}: i32, %{{.*}}: memref<28xf64>)

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#define DATA_TYPE double
@ -29,3 +30,5 @@ void kernel_correlation(int m, double corr[28][28])
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: func @kernel_correlation(%{{.*}}: i32, %{{.*}}: memref<28x28xf64>)

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#define DATA_TYPE double
@ -29,3 +30,5 @@ void kernel_correlation(double A[28], double B[28])
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: func @kernel_correlation(%{{.*}}: memref<28xf64>, %{{.*}}: memref<28xf64>)

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#define DATA_TYPE double
@ -58,3 +59,5 @@ void kernel_correlation(int n, double alpha, double beta,
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: func @kernel_correlation(%{{.*}}: i32, %{{.*}}: f64, %{{.*}}: f64, %{{.*}}: memref<28x28xf64>, %{{.*}}: memref<28x28xf64>, %{{.*}}: memref<28xf64>, %{{.*}}: memref<28xf64>, %{{.*}}: memref<28xf64>)

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s -detect-reduction --function=kernel_nussinov -S | FileCheck %s
// RUN: mlir-clang %s -detect-reduction --function=kernel_nussinov -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#define max_score(s1, s2) ((s1 >= s2) ? s1 : s2)
@ -30,3 +31,7 @@ void kernel_nussinov(double* out, int n) {
// CHECK-NEXT: affine.store %4, %arg0[symbol(%[[i1]])] : memref<?xf64>
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: func @kernel_nussinov(%{{.*}}: memref<?xf64>, %{{.*}}: i32)
// FULLRANK: %[[i0:.+]] = memref.alloca() : memref<20xf64>
// FULLRANK: call @set(%[[i0]]) : (memref<20xf64>) -> ()

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#define DATA_TYPE double
@ -30,3 +31,5 @@ void kernel_correlation(int table[N][N]) {
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: func @kernel_correlation(%{{.*}}: memref<10x10xi32>)

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s %stdinclude --function=set -S | FileCheck %s
// RUN: mlir-clang %s %stdinclude --function=set -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
#include <stdio.h>
#include <unistd.h>
@ -38,3 +39,5 @@ void set (int path[20])
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: func @set(%{{.*}}: memref<20xi32>)

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s --function=whiletofor -S | FileCheck %s
// RUN: mlir-clang %s --function=whiletofor -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
void use(int a[100][100]);
@ -51,3 +52,6 @@ void whiletofor() {
// CHECK-NEXT: call @use(%2) : (memref<?x100xi32>) -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
// FULLRANK: %[[VAL0:.*]] = memref.alloca() : memref<100x100xi32>
// FULLRANK: call @use(%[[VAL0]]) : (memref<100x100xi32>) -> ()

View File

@ -1,4 +1,5 @@
// RUN: mlir-clang %s %stdinclude -S | FileCheck %s
// RUN: mlir-clang %s %stdinclude -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
// RUN: clang %s -O3 %stdinclude %polyverify -o %s.exec1 && %s.exec1 &> %s.out1
// RUN: mlir-clang %s %polyverify %stdinclude -O3 -o %s.execm && %s.execm &> %s.out2
// RUN: rm -f %s.exec1 %s.execm
@ -160,6 +161,8 @@ int main(int argc, char** argv)
return 0;
}
// FULLRANK: func @kernel_gemm(%{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: f64, %{{.*}}: f64, %{{.*}}: memref<1000x1100xf64>, %{{.*}}: memref<1000x1200xf64>, %{{.*}}: memref<1200x1100xf64>)
// CHECK: func @kernel_gemm(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: f64, %arg4: f64, %arg5: memref<?x1100xf64>, %arg6: memref<?x1200xf64>, %arg7: memref<?x1100xf64>)
// CHECK-DAG: %[[i0:.+]] = index_cast %arg0 : i32 to index
// CHECK-DAG: %[[i1:.+]] = index_cast %arg1 : i32 to index