hanchenye-llvm-project/polly/test/ScopInfo/assume_gep_bounds.ll

; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s

;    void foo(float A[][20][30], long n, long m, long p) {
;      for (long i = 0; i < n; i++)
;        for (long j = 0; j < m; j++)
;          for (long k = 0; k < p; k++)
;            A[i][j][k] = i + j + k;
;    }

; For the above code we want to assume that all memory accesses are within the
; bounds of the array A. In C (and LLVM-IR) this is not required, such that out
; of bounds accesses are valid. However, as such accesses are uncommon, cause
; complicated dependence pattern and as a result make dependence analysis more
; costly and may prevent or hinder useful program transformations, we assume
; absence of out-of-bound accesses. To do so we derive the set of parameter
; values for which our assumption holds.

; CHECK: Assumed Context
; CHECK-NEXT: [n, m, p] -> {  :
; CHECK-DAG:                    p <= 30
; CHECK-DAG:                     and
; CHECK-DAG:                    m <= 20
; CHECK:                   }

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define void @foo([20 x [30 x float]]* %A, i64 %n, i64 %m, i64 %p) {
entry:
  br label %for.cond

for.cond:                                         ; preds = %for.inc13, %entry
  %i.0 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ]
  %cmp = icmp slt i64 %i.0, %n
  br i1 %cmp, label %for.body, label %for.end15

for.body:                                         ; preds = %for.cond
  br label %for.cond1

for.cond1:                                        ; preds = %for.inc10, %for.body
  %j.0 = phi i64 [ 0, %for.body ], [ %inc11, %for.inc10 ]
  %cmp2 = icmp slt i64 %j.0, %m
  br i1 %cmp2, label %for.body3, label %for.end12

for.body3:                                        ; preds = %for.cond1
  br label %for.cond4

for.cond4:                                        ; preds = %for.inc, %for.body3
  %k.0 = phi i64 [ 0, %for.body3 ], [ %inc, %for.inc ]
  %cmp5 = icmp slt i64 %k.0, %p
  br i1 %cmp5, label %for.body6, label %for.end

for.body6:                                        ; preds = %for.cond4
  %add = add nsw i64 %i.0, %j.0
  %add7 = add nsw i64 %add, %k.0
  %conv = sitofp i64 %add7 to float
  %arrayidx9 = getelementptr inbounds [20 x [30 x float]], [20 x [30 x float]]* %A, i64 %i.0, i64 %j.0, i64 %k.0
  store float %conv, float* %arrayidx9, align 4
  br label %for.inc

for.inc:                                          ; preds = %for.body6
  %inc = add nsw i64 %k.0, 1
  br label %for.cond4

for.end:                                          ; preds = %for.cond4
  br label %for.inc10

for.inc10:                                        ; preds = %for.end
  %inc11 = add nsw i64 %j.0, 1
  br label %for.cond1

for.end12:                                        ; preds = %for.cond1
  br label %for.inc13

for.inc13:                                        ; preds = %for.end12
  %inc14 = add nsw i64 %i.0, 1
  br label %for.cond

for.end15:                                        ; preds = %for.cond
  ret void
}
ScopDetection: Only detect scops that have at least one read and one write Scops that only read seem generally uninteresting and scops that only write are most likely initializations where there is also little to optimize. To not waste compile time we bail early. Differential Revision: http://reviews.llvm.org/D7735 llvm-svn: 229820 2015-02-19 13:31:07 +08:00			`; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s \| FileCheck %s`
Assume GetElementPtr offsets to be inbounds In case a GEP instruction references into a fixed size array e.g., an access A[i][j] into an array A[100x100], LLVM-IR does not guarantee that the subscripts always compute values that are within array bounds. We now derive the set of parameter values for which all accesses are within bounds and add the assumption that the scop is only every executed with this set of parameter values. Example: void foo(float A[][20], long n, long m { for (long i = 0; i < n; i++) for (long j = 0; j < m; j++) A[i][j] = ... This loop yields out-of-bound accesses if m is at least 20 and at the same time at least one iteration of the outer loop is executed. Hence, we assume: n <= 0 or m <= 20. Doing so simplifies the dependence analysis problem, allows us to perform more optimizations and generate better code. TODO: The location where the GEP instruction is executed is not necessarily the location where the memory is actually accessed. As a result scanning for GEP[s] is imprecise. Even though this is not a correctness problem, this imprecision may result in missed optimizations or non-optimal run-time checks. In polybench where this mismatch between parametric loop bounds and fixed size arrays is common, we see with this patch significant reductions in compile time (up to 50%) and execution time (up to 70%). We see two significant compile time regressions (fdtd-2d, jacobi-2d-imper), and one execution time regression (trmm). Both regressions arise due to additional optimizations that have been enabled by this patch. They can be addressed in subsequent commits. http://reviews.llvm.org/D6369 llvm-svn: 222754 2014-11-25 18:51:12 +08:00
			`; void foo(float A[][20][30], long n, long m, long p) {`
			`; for (long i = 0; i < n; i++)`
			`; for (long j = 0; j < m; j++)`
			`; for (long k = 0; k < p; k++)`
			`; A[i][j][k] = i + j + k;`
			`; }`

			`; For the above code we want to assume that all memory accesses are within the`
			`; bounds of the array A. In C (and LLVM-IR) this is not required, such that out`
			`; of bounds accesses are valid. However, as such accesses are uncommon, cause`
			`; complicated dependence pattern and as a result make dependence analysis more`
			`; costly and may prevent or hinder useful program transformations, we assume`
			`; absence of out-of-bound accesses. To do so we derive the set of parameter`
			`; values for which our assumption holds.`

			`; CHECK: Assumed Context`
Use all available range information for parameters In the following even full-range information will help to avoid runtime checks for wrapping integers, hence we enable it now. llvm-svn: 235823 2015-04-27 04:07:21 +08:00			`; CHECK-NEXT: [n, m, p] -> { :`
			`; CHECK-DAG: p <= 30`
			`; CHECK-DAG: and`
			`; CHECK-DAG: m <= 20`
			`; CHECK: }`
Assume GetElementPtr offsets to be inbounds In case a GEP instruction references into a fixed size array e.g., an access A[i][j] into an array A[100x100], LLVM-IR does not guarantee that the subscripts always compute values that are within array bounds. We now derive the set of parameter values for which all accesses are within bounds and add the assumption that the scop is only every executed with this set of parameter values. Example: void foo(float A[][20], long n, long m { for (long i = 0; i < n; i++) for (long j = 0; j < m; j++) A[i][j] = ... This loop yields out-of-bound accesses if m is at least 20 and at the same time at least one iteration of the outer loop is executed. Hence, we assume: n <= 0 or m <= 20. Doing so simplifies the dependence analysis problem, allows us to perform more optimizations and generate better code. TODO: The location where the GEP instruction is executed is not necessarily the location where the memory is actually accessed. As a result scanning for GEP[s] is imprecise. Even though this is not a correctness problem, this imprecision may result in missed optimizations or non-optimal run-time checks. In polybench where this mismatch between parametric loop bounds and fixed size arrays is common, we see with this patch significant reductions in compile time (up to 50%) and execution time (up to 70%). We see two significant compile time regressions (fdtd-2d, jacobi-2d-imper), and one execution time regression (trmm). Both regressions arise due to additional optimizations that have been enabled by this patch. They can be addressed in subsequent commits. http://reviews.llvm.org/D6369 llvm-svn: 222754 2014-11-25 18:51:12 +08:00
			`target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"`

			`define void @foo([20 x [30 x float]]* %A, i64 %n, i64 %m, i64 %p) {`
			`entry:`
			`br label %for.cond`

			`for.cond: ; preds = %for.inc13, %entry`
			`%i.0 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ]`
			`%cmp = icmp slt i64 %i.0, %n`
			`br i1 %cmp, label %for.body, label %for.end15`

			`for.body: ; preds = %for.cond`
			`br label %for.cond1`

			`for.cond1: ; preds = %for.inc10, %for.body`
			`%j.0 = phi i64 [ 0, %for.body ], [ %inc11, %for.inc10 ]`
			`%cmp2 = icmp slt i64 %j.0, %m`
			`br i1 %cmp2, label %for.body3, label %for.end12`

			`for.body3: ; preds = %for.cond1`
			`br label %for.cond4`

			`for.cond4: ; preds = %for.inc, %for.body3`
			`%k.0 = phi i64 [ 0, %for.body3 ], [ %inc, %for.inc ]`
			`%cmp5 = icmp slt i64 %k.0, %p`
			`br i1 %cmp5, label %for.body6, label %for.end`

			`for.body6: ; preds = %for.cond4`
			`%add = add nsw i64 %i.0, %j.0`
			`%add7 = add nsw i64 %add, %k.0`
			`%conv = sitofp i64 %add7 to float`
Update Polly tests to handle explicitly typed gep changes in LLVM llvm-svn: 230784 2015-02-28 03:20:19 +08:00			`%arrayidx9 = getelementptr inbounds [20 x [30 x float]], [20 x [30 x float]]* %A, i64 %i.0, i64 %j.0, i64 %k.0`
Assume GetElementPtr offsets to be inbounds In case a GEP instruction references into a fixed size array e.g., an access A[i][j] into an array A[100x100], LLVM-IR does not guarantee that the subscripts always compute values that are within array bounds. We now derive the set of parameter values for which all accesses are within bounds and add the assumption that the scop is only every executed with this set of parameter values. Example: void foo(float A[][20], long n, long m { for (long i = 0; i < n; i++) for (long j = 0; j < m; j++) A[i][j] = ... This loop yields out-of-bound accesses if m is at least 20 and at the same time at least one iteration of the outer loop is executed. Hence, we assume: n <= 0 or m <= 20. Doing so simplifies the dependence analysis problem, allows us to perform more optimizations and generate better code. TODO: The location where the GEP instruction is executed is not necessarily the location where the memory is actually accessed. As a result scanning for GEP[s] is imprecise. Even though this is not a correctness problem, this imprecision may result in missed optimizations or non-optimal run-time checks. In polybench where this mismatch between parametric loop bounds and fixed size arrays is common, we see with this patch significant reductions in compile time (up to 50%) and execution time (up to 70%). We see two significant compile time regressions (fdtd-2d, jacobi-2d-imper), and one execution time regression (trmm). Both regressions arise due to additional optimizations that have been enabled by this patch. They can be addressed in subsequent commits. http://reviews.llvm.org/D6369 llvm-svn: 222754 2014-11-25 18:51:12 +08:00			`store float %conv, float* %arrayidx9, align 4`
			`br label %for.inc`

			`for.inc: ; preds = %for.body6`
			`%inc = add nsw i64 %k.0, 1`
			`br label %for.cond4`

			`for.end: ; preds = %for.cond4`
			`br label %for.inc10`

			`for.inc10: ; preds = %for.end`
			`%inc11 = add nsw i64 %j.0, 1`
			`br label %for.cond1`

			`for.end12: ; preds = %for.cond1`
			`br label %for.inc13`

			`for.inc13: ; preds = %for.end12`
			`%inc14 = add nsw i64 %i.0, 1`
			`br label %for.cond`

			`for.end15: ; preds = %for.cond`
			`ret void`
			`}`