* Fixed linkage issue due to incorrect macro defs * Re-implemented memref-fullrank on the latest main * Initial local static variable support * Actually git add test * Disable inliner for -O0 (#90) * Disabled inliner when -O0 is present * Added tests for no inline * Added test case for -O1 * Checked -memref-fullrank in existing tests Co-authored-by: William S. Moses <gh@wsmoses.com>
This commit is contained in:
parent
c0fd591a92
commit
cadc844ece
|
@ -42,6 +42,67 @@ using namespace mlir;
|
|||
|
||||
#define DEBUG_TYPE "clang-mlir"
|
||||
|
||||
static cl::opt<bool>
|
||||
memRefFullRank("memref-fullrank", cl::init(false),
|
||||
cl::desc("Get the full rank of the memref."));
|
||||
|
||||
/// Try to typecast the caller arg of type MemRef to fit the corresponding
|
||||
/// callee arg type. We only deal with the cast where src and dst have the same
|
||||
/// shape size and elem type, and just the first shape differs: src has -1 and
|
||||
/// dst has a constant integer.
|
||||
static mlir::Value castCallerMemRefArg(mlir::Value callerArg,
|
||||
mlir::Type calleeArgType,
|
||||
mlir::OpBuilder &b) {
|
||||
mlir::OpBuilder::InsertionGuard guard(b);
|
||||
mlir::Type callerArgType = callerArg.getType();
|
||||
|
||||
if (MemRefType dstTy = calleeArgType.dyn_cast_or_null<MemRefType>()) {
|
||||
MemRefType srcTy = callerArgType.dyn_cast<MemRefType>();
|
||||
if (srcTy && dstTy.getElementType() == srcTy.getElementType()) {
|
||||
auto srcShape = srcTy.getShape();
|
||||
auto dstShape = dstTy.getShape();
|
||||
|
||||
if (srcShape.size() == dstShape.size() && !srcShape.empty() &&
|
||||
srcShape[0] == -1 &&
|
||||
std::equal(std::next(srcShape.begin()), srcShape.end(),
|
||||
std::next(dstShape.begin()))) {
|
||||
b.setInsertionPointAfterValue(callerArg);
|
||||
|
||||
return b.create<mlir::memref::CastOp>(callerArg.getLoc(), callerArg,
|
||||
calleeArgType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return the original value when casting fails.
|
||||
return callerArg;
|
||||
}
|
||||
|
||||
/// Typecast the caller args to match the callee's signature. Mismatches that
|
||||
/// cannot be resolved by given rules won't raise exceptions, e.g., if the
|
||||
/// expected type for an arg is memref<10xi8> while the provided is
|
||||
/// memref<20xf32>, we will simply ignore the case in this function and wait for
|
||||
/// the rest of the pipeline to detect it.
|
||||
static void castCallerArgs(mlir::FuncOp callee,
|
||||
llvm::SmallVectorImpl<mlir::Value> &args,
|
||||
mlir::OpBuilder &b) {
|
||||
mlir::FunctionType funcTy = callee.getType().cast<mlir::FunctionType>();
|
||||
assert(args.size() == funcTy.getNumInputs() &&
|
||||
"The caller arguments should have the same size as the number of "
|
||||
"callee arguments as the interface.");
|
||||
|
||||
for (unsigned i = 0; i < args.size(); ++i) {
|
||||
mlir::Type calleeArgType = funcTy.getInput(i);
|
||||
mlir::Type callerArgType = args[i].getType();
|
||||
|
||||
if (calleeArgType == callerArgType)
|
||||
continue;
|
||||
|
||||
if (calleeArgType.isa<MemRefType>())
|
||||
args[i] = castCallerMemRefArg(args[i], calleeArgType, b);
|
||||
}
|
||||
}
|
||||
|
||||
class IfScope {
|
||||
public:
|
||||
MLIRScanner &scanner;
|
||||
|
@ -3305,6 +3366,9 @@ ValueCategory MLIRScanner::VisitCallExpr(clang::CallExpr *expr) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// Try to rescue some mismatched types.
|
||||
castCallerArgs(tocall, args, builder);
|
||||
|
||||
auto op = builder.create<mlir::CallOp>(loc, tocall, args);
|
||||
for (auto pair : toRestore) {
|
||||
pair.second.store(builder, pair.first, /*isArray*/ true);
|
||||
|
@ -5854,6 +5918,22 @@ mlir::Location MLIRASTConsumer::getMLIRLocation(clang::SourceLocation loc) {
|
|||
return FileLineColLoc::get(ctx, fileId, lineNumber, colNumber);
|
||||
}
|
||||
|
||||
/// Iteratively get the size of each dim of the given ConstantArrayType inst.
|
||||
static void getConstantArrayShapeAndElemType(const clang::QualType &ty,
|
||||
SmallVectorImpl<int64_t> &shape,
|
||||
clang::QualType &elemTy) {
|
||||
shape.clear();
|
||||
|
||||
clang::QualType curTy = ty;
|
||||
while (curTy->isConstantArrayType()) {
|
||||
auto cstArrTy = cast<clang::ConstantArrayType>(curTy);
|
||||
shape.push_back(cstArrTy->getSize().getSExtValue());
|
||||
curTy = cstArrTy->getElementType();
|
||||
}
|
||||
|
||||
elemTy = curTy;
|
||||
}
|
||||
|
||||
mlir::Type MLIRASTConsumer::getMLIRType(clang::QualType qt, bool *implicitRef,
|
||||
bool allowMerge) {
|
||||
if (auto ET = dyn_cast<clang::ElaboratedType>(qt)) {
|
||||
|
@ -5879,6 +5959,24 @@ mlir::Type MLIRASTConsumer::getMLIRType(clang::QualType qt, bool *implicitRef,
|
|||
bool assumeRef = false;
|
||||
auto mlirty = getMLIRType(DT->getOriginalType(), &assumeRef, allowMerge);
|
||||
if (assumeRef) {
|
||||
// Constant array types like `int A[30][20]` will be converted to LLVM
|
||||
// type `[20 x i32]* %0`, which has the outermost dimension size erased,
|
||||
// and we can only recover to `memref<?x20xi32>` from there. This prevents
|
||||
// us from doing more comprehensive analysis. Here we specifically handle
|
||||
// this case by unwrapping the clang-adjusted type, to get the
|
||||
// corresponding ConstantArrayType with the full dimensions.
|
||||
if (memRefFullRank) {
|
||||
clang::QualType origTy = DT->getOriginalType();
|
||||
if (origTy->isConstantArrayType()) {
|
||||
SmallVector<int64_t, 4> shape;
|
||||
clang::QualType elemTy;
|
||||
getConstantArrayShapeAndElemType(origTy, shape, elemTy);
|
||||
|
||||
return mlir::MemRefType::get(shape, getMLIRType(elemTy));
|
||||
}
|
||||
}
|
||||
|
||||
// If -memref-fullrank is unset or it cannot be fulfilled.
|
||||
auto mt = mlirty.dyn_cast<MemRefType>();
|
||||
auto shape2 = std::vector<int64_t>(mt.getShape());
|
||||
shape2[0] = -1;
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
// RUN: mlir-clang %s --function=* -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=* -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
void sub0(int a[2]);
|
||||
void sub(int a[2]) {
|
||||
a[2]++;
|
||||
}
|
||||
void sub(int a[2]) { a[2]++; }
|
||||
|
||||
void kernel_deriche() {
|
||||
int a[2];
|
||||
sub0(a);
|
||||
int a[2];
|
||||
sub0(a);
|
||||
}
|
||||
|
||||
// FULLRANK: @sub(%arg0: memref<2xi32>)
|
||||
// CHECK: @sub(%arg0: memref<?xi32>)
|
||||
// CHECK-NEXT: %c1_i32 = constant 1 : i32
|
||||
// CHECK-NEXT: %0 = affine.load %arg0[2] : memref<?xi32>
|
||||
|
@ -24,4 +24,3 @@ void kernel_deriche() {
|
|||
// CHECK-NEXT: call @sub0(%1) : (memref<?xi32>) -> ()
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s %stdinclude -S | FileCheck %s
|
||||
// RUN: mlir-clang %s %stdinclude -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
@ -37,3 +38,6 @@ int main()
|
|||
// CHECK-NEXT: call @init_array(%1) : (memref<?xi32>) -> ()
|
||||
// CHECK-NEXT: return %c0_i32 : i32
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: %[[MEM:.*]] = memref.alloc() : memref<2800xi32>
|
||||
// FULLRANK: call @init_array(%[[MEM]]) : (memref<2800xi32>) -> ()
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s %stdinclude -S | FileCheck %s
|
||||
// RUN: mlir-clang %s %stdinclude -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
@ -38,3 +39,6 @@ int main()
|
|||
// CHECK-NEXT: memref.dealloc %0 : memref<2800xi32>
|
||||
// CHECK-NEXT: return %c0_i32 : i32
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: %[[MEM:.*]] = memref.alloc() : memref<2800xi32>
|
||||
// FULLRANK: call @init_array(%[[MEM]]) : (memref<2800xi32>) -> ()
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=kernel_deriche -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_deriche -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
int kernel_deriche(int a[30][40]) {
|
||||
a[3][5]++;
|
||||
|
@ -13,3 +14,5 @@ int kernel_deriche(int a[30][40]) {
|
|||
// CHECK-NEXT: %2 = affine.load %arg0[1, 2] : memref<?x40xi32>
|
||||
// CHECK-NEXT: return %2 : i32
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_deriche(%arg0: memref<30x40xi32>) -> i32
|
|
@ -0,0 +1,33 @@
|
|||
// RUN: mlir-clang %s -S -memref-fullrank -O0 | FileCheck %s
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int f(int A[10][20]) {
|
||||
int i, j, sum = 0;
|
||||
#pragma scop
|
||||
for (i = 0; i < 10; i++)
|
||||
for (j = 0; j < 20; j++)
|
||||
sum += A[i][j];
|
||||
#pragma endscop
|
||||
return sum;
|
||||
}
|
||||
|
||||
int g(int A[10][20]) {
|
||||
int c = f(A);
|
||||
printf("%d\n", c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main() {
|
||||
int A[10][20];
|
||||
return g(A);
|
||||
}
|
||||
|
||||
// CHECK: func @main()
|
||||
// CHECK: %[[VAL0:.*]] = memref.alloca() : memref<10x20xi32>
|
||||
// CHECK: %{{.*}} = call @g(%[[VAL0]]) : (memref<10x20xi32>) -> i32
|
||||
|
||||
// CHECK: func @g(%arg0: memref<10x20xi32>) -> i32
|
||||
|
||||
// CHECK: func @f(%arg0: memref<10x20xi32>) -> i32
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=kernel_deriche -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_deriche -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
int kernel_deriche(int a[30]) {
|
||||
a[0]++;
|
||||
|
@ -13,3 +14,5 @@ int kernel_deriche(int a[30]) {
|
|||
// CHECK-NEXT: %2 = affine.load %arg0[1] : memref<?xi32>
|
||||
// CHECK-NEXT: return %2 : i32
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_deriche(%arg0: memref<30xi32>) -> i32
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s %stdinclude --function=init_array -S | FileCheck %s
|
||||
// RUN: mlir-clang %s %stdinclude --function=init_array -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
@ -63,3 +64,5 @@ void init_array (int path[10][10])
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @init_array(%{{.*}}: memref<10x10xi32>)
|
|
@ -1,4 +1,6 @@
|
|||
// RUN: mlir-clang %s --function=kernel_nussinov -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_nussinov -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#define N 5500
|
||||
#define max_score(s1, s2) ((s1 >= s2) ? s1 : s2)
|
||||
|
||||
|
@ -15,6 +17,8 @@
|
|||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: @kernel_nussinov(%{{.*}}: i32, %{{.*}}: memref<5500xi32>)
|
||||
|
||||
void kernel_nussinov(int n, int table[N])
|
||||
{
|
||||
int j;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s %stdinclude --function=init_array -S | FileCheck %s
|
||||
// RUN: mlir-clang %s %stdinclude --function=init_array -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
@ -26,3 +27,6 @@ void init_array (int n)
|
|||
// CHECK-NEXT: call @use(%1) : (memref<?xf64>) -> ()
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: %[[VAL0:.*]] = memref.alloc() : memref<20xf64>
|
||||
// FULLRANK: call @use(%[[VAL0]]) : (memref<20xf64>) -> ()
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=kernel_correlation -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_correlation -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#define DATA_TYPE double
|
||||
|
||||
|
@ -38,3 +39,5 @@ void kernel_correlation(double out[28], double stddev[28], _Bool cmp)
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_correlation(%{{.*}}: memref<28xf64>, %{{.*}}: memref<28xf64>, %{{.*}}: i1)
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#define DATA_TYPE double
|
||||
|
||||
|
@ -21,3 +22,5 @@ void kernel_correlation(int m, double corr[28])
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_correlation(%{{.*}}: i32, %{{.*}}: memref<28xf64>)
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#define DATA_TYPE double
|
||||
|
||||
|
@ -29,3 +30,5 @@ void kernel_correlation(int m, double corr[28][28])
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_correlation(%{{.*}}: i32, %{{.*}}: memref<28x28xf64>)
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#define DATA_TYPE double
|
||||
|
||||
|
@ -29,3 +30,5 @@ void kernel_correlation(double A[28], double B[28])
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_correlation(%{{.*}}: memref<28xf64>, %{{.*}}: memref<28xf64>)
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#define DATA_TYPE double
|
||||
|
||||
|
@ -58,3 +59,5 @@ void kernel_correlation(int n, double alpha, double beta,
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_correlation(%{{.*}}: i32, %{{.*}}: f64, %{{.*}}: f64, %{{.*}}: memref<28x28xf64>, %{{.*}}: memref<28x28xf64>, %{{.*}}: memref<28xf64>, %{{.*}}: memref<28xf64>, %{{.*}}: memref<28xf64>)
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s -detect-reduction --function=kernel_nussinov -S | FileCheck %s
|
||||
// RUN: mlir-clang %s -detect-reduction --function=kernel_nussinov -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#define max_score(s1, s2) ((s1 >= s2) ? s1 : s2)
|
||||
|
||||
|
@ -30,3 +31,7 @@ void kernel_nussinov(double* out, int n) {
|
|||
// CHECK-NEXT: affine.store %4, %arg0[symbol(%[[i1]])] : memref<?xf64>
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_nussinov(%{{.*}}: memref<?xf64>, %{{.*}}: i32)
|
||||
// FULLRANK: %[[i0:.+]] = memref.alloca() : memref<20xf64>
|
||||
// FULLRANK: call @set(%[[i0]]) : (memref<20xf64>) -> ()
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=kernel_correlation --raise-scf-to-affine -S -memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#define DATA_TYPE double
|
||||
|
||||
|
@ -30,3 +31,5 @@ void kernel_correlation(int table[N][N]) {
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @kernel_correlation(%{{.*}}: memref<10x10xi32>)
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s %stdinclude --function=set -S | FileCheck %s
|
||||
// RUN: mlir-clang %s %stdinclude --function=set -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
@ -38,3 +39,5 @@ void set (int path[20])
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: func @set(%{{.*}}: memref<20xi32>)
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s --function=whiletofor -S | FileCheck %s
|
||||
// RUN: mlir-clang %s --function=whiletofor -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
|
||||
void use(int a[100][100]);
|
||||
|
||||
|
@ -51,3 +52,6 @@ void whiletofor() {
|
|||
// CHECK-NEXT: call @use(%2) : (memref<?x100xi32>) -> ()
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// FULLRANK: %[[VAL0:.*]] = memref.alloca() : memref<100x100xi32>
|
||||
// FULLRANK: call @use(%[[VAL0]]) : (memref<100x100xi32>) -> ()
|
|
@ -1,4 +1,5 @@
|
|||
// RUN: mlir-clang %s %stdinclude -S | FileCheck %s
|
||||
// RUN: mlir-clang %s %stdinclude -S --memref-fullrank | FileCheck %s --check-prefix=FULLRANK
|
||||
// RUN: clang %s -O3 %stdinclude %polyverify -o %s.exec1 && %s.exec1 &> %s.out1
|
||||
// RUN: mlir-clang %s %polyverify %stdinclude -O3 -o %s.execm && %s.execm &> %s.out2
|
||||
// RUN: rm -f %s.exec1 %s.execm
|
||||
|
@ -160,6 +161,8 @@ int main(int argc, char** argv)
|
|||
return 0;
|
||||
}
|
||||
|
||||
// FULLRANK: func @kernel_gemm(%{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: f64, %{{.*}}: f64, %{{.*}}: memref<1000x1100xf64>, %{{.*}}: memref<1000x1200xf64>, %{{.*}}: memref<1200x1100xf64>)
|
||||
|
||||
// CHECK: func @kernel_gemm(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: f64, %arg4: f64, %arg5: memref<?x1100xf64>, %arg6: memref<?x1200xf64>, %arg7: memref<?x1100xf64>)
|
||||
// CHECK-DAG: %[[i0:.+]] = index_cast %arg0 : i32 to index
|
||||
// CHECK-DAG: %[[i1:.+]] = index_cast %arg1 : i32 to index
|
||||
|
|
Loading…
Reference in New Issue