hanchenye-llvm-project/polly/test/ScopDetect/base_pointer.ll

297 lines
7.6 KiB
LLVM
Raw Normal View History

; RUN: opt %loadPolly -disable-basicaa -polly-detect -polly-invariant-load-hoisting=true -analyze < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
define void @base_pointer_in_condition(i64** noalias %A_ptr, i64 %N) nounwind {
entry:
fence seq_cst
br label %pre
pre:
%A = load i64*, i64** %A_ptr
br i1 true, label %for.i, label %then
for.i:
%indvar = phi i64 [ 0, %pre ], [ %indvar.next, %for.i ]
%scevgep = getelementptr i64, i64* %A, i64 %indvar
store i64 %indvar, i64* %scevgep
%indvar.next = add nsw i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %N
br i1 %exitcond, label %then, label %for.i
then:
br label %return
return:
fence seq_cst
ret void
}
; CHECK-LABEL: base_pointer_in_condition
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
; CHECK: Valid Region for Scop: pre => return
define void @base_pointer_is_argument(float* %A, i64 %n) {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
br label %S1
S1:
%conv = sitofp i64 %indvar.i to float
%arrayidx5 = getelementptr float, float* %A, i64 %indvar.i
store float %conv, float* %arrayidx5, align 4
br label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK-LABEL: base_pointer_is_argument
; CHECK: Valid Region for Scop: for.i => exit
define void @base_pointer_is_const_expr(i64 %n) {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
br label %S1
S1:
%conv = sitofp i64 %indvar.i to float
%arrayidx5 = getelementptr float, float* inttoptr (i64 100 to float*), i64 %indvar.i
store float %conv, float* %arrayidx5, align 4
br label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK-LABEL: base_pointer_is_const_expr
; CHECK-LABEL: Valid Region for Scop: for.i => exit
@A = external global float
define void @base_pointer_is_global(i64 %n) {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
br label %S1
S1:
%conv = sitofp i64 %indvar.i to float
%arrayidx5 = getelementptr float, float* @A, i64 %indvar.i
store float %conv, float* %arrayidx5, align 4
br label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK-LABEL: base_pointer_is_global
; CHECK: Valid Region for Scop: for.i => exit
declare float *@foo()
define void @base_pointer_is_inst_outside(i64 %n) {
entry:
%A = call float *@foo()
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
br label %S1
S1:
%conv = sitofp i64 %indvar.i to float
%arrayidx5 = getelementptr float, float* %A, i64 %indvar.i
store float %conv, float* %arrayidx5, align 4
br label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK-LABEL: base_pointer_is_inst_outside
; CHECK: Valid Region for Scop: for.i => exit
declare float* @getNextBasePtr(float*) readnone nounwind
define void @base_pointer_is_phi_node(i64 %n, float* %A) {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
%ptr = phi float* [ %ptr.next, %for.i.inc ], [ %A, %entry ]
; To get a PHI node inside a SCoP that can not be analyzed but
; for which the surrounding SCoP is normally still valid we use a function
; without any side effects.
%ptr.next = call float* @getNextBasePtr(float* %ptr)
br label %S1
S1:
%conv = sitofp i64 %indvar.i to float
%arrayidx5 = getelementptr float, float* %ptr, i64 %indvar.i
store float %conv, float* %arrayidx5, align 4
br label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK-LABEL: base_pointer_is_phi_node
; CHECK-NOT: Valid Region for Scop
define void @base_pointer_is_inst_inside_invariant_1(i64 %n, float* %A) {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
[ScopDetection] Only allow SCoP-wide available base pointers. Simplify ScopDetection::isInvariant(). Essentially deny everything that is defined within the SCoP and is not load-hoisted. The previous understanding of "invariant" has a few holes: - Expressions without side-effects with only invariant arguments, but are defined withing the SCoP's region with the exception of selects and PHIs. These should be part of the index expression derived by ScalarEvolution and not of the base pointer. - Function calls with that are !mayHaveSideEffects() (typically functions with "readnone nounwind" attributes). An example is given below. @C = external global i32 declare float* @getNextBasePtr(float*) readnone nounwind ... %ptr = call float* @getNextBasePtr(float* %A, float %B) The call might return: * %A, so %ptr aliases with it in the SCoP * %B, so %ptr aliases with it in the SCoP * @C, so %ptr aliases with it in the SCoP * a new pointer everytime it is called, such as malloc() * a pointer into the allocated block of one of the aforementioned * any of the above, at random at each call Hence and contrast to a comment in the base_pointer.ll regression test, %ptr is not necessarily the same all the time. It might also alias with anything and no AliasAnalysis can tell otherwise if the definition is external. It is hence not suitable in the role of a base pointer. The practical problem with base pointers defined in SCoP statements is that it is not available globally in the SCoP. The statement instance must be executed first before the base pointer can be used. This is no problem if the base pointer is transferred as a scalar value between statements. Uses of MemoryAccess::setNewAccessRelation may add a use of the base pointer anywhere in the array. setNewAccessRelation is used by JSONImporter, DeLICM and D28518. Indeed, BlockGenerator currently assumes that base pointers are available globally and generates invalid code for new access relation (referring to the base pointer of the original code) if not, even if the base pointer would be available in the statement. This could be fixed with some added complexity and restrictions. The ExprBuilder must lookup the local BBMap and code that call setNewAccessRelation must check whether the base pointer is available first. The code would still be incorrect in the presence of aliasing. There is the switch -polly-ignore-aliasing to explicitly allow this, but it is hardly a justification for the additional complexity. It would still be mostly useless because in most cases either getNextBasePtr() has external linkage in which case the readnone nounwind attributes cannot be derived in the translation unit itself, or is defined in the same translation unit and gets inlined. Reviewed By: grosser Differential Revision: https://reviews.llvm.org/D30695 llvm-svn: 297281
2017-03-08 23:14:46 +08:00
; A function return value, even with readnone nounwind attributes, is not
; considered a valid base pointer because it can return a pointer that aliases
; with something else (e.g. %A or a global) or return a different pointer at
; every call (e.g. malloc)
%ptr = call float* @getNextBasePtr(float* %A)
br label %S1
S1:
%conv = sitofp i64 %indvar.i to float
%arrayidx5 = getelementptr float, float* %ptr, i64 %indvar.i
store float %conv, float* %arrayidx5, align 4
br label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK-LABEL: base_pointer_is_inst_inside_invariant_1
[ScopDetection] Only allow SCoP-wide available base pointers. Simplify ScopDetection::isInvariant(). Essentially deny everything that is defined within the SCoP and is not load-hoisted. The previous understanding of "invariant" has a few holes: - Expressions without side-effects with only invariant arguments, but are defined withing the SCoP's region with the exception of selects and PHIs. These should be part of the index expression derived by ScalarEvolution and not of the base pointer. - Function calls with that are !mayHaveSideEffects() (typically functions with "readnone nounwind" attributes). An example is given below. @C = external global i32 declare float* @getNextBasePtr(float*) readnone nounwind ... %ptr = call float* @getNextBasePtr(float* %A, float %B) The call might return: * %A, so %ptr aliases with it in the SCoP * %B, so %ptr aliases with it in the SCoP * @C, so %ptr aliases with it in the SCoP * a new pointer everytime it is called, such as malloc() * a pointer into the allocated block of one of the aforementioned * any of the above, at random at each call Hence and contrast to a comment in the base_pointer.ll regression test, %ptr is not necessarily the same all the time. It might also alias with anything and no AliasAnalysis can tell otherwise if the definition is external. It is hence not suitable in the role of a base pointer. The practical problem with base pointers defined in SCoP statements is that it is not available globally in the SCoP. The statement instance must be executed first before the base pointer can be used. This is no problem if the base pointer is transferred as a scalar value between statements. Uses of MemoryAccess::setNewAccessRelation may add a use of the base pointer anywhere in the array. setNewAccessRelation is used by JSONImporter, DeLICM and D28518. Indeed, BlockGenerator currently assumes that base pointers are available globally and generates invalid code for new access relation (referring to the base pointer of the original code) if not, even if the base pointer would be available in the statement. This could be fixed with some added complexity and restrictions. The ExprBuilder must lookup the local BBMap and code that call setNewAccessRelation must check whether the base pointer is available first. The code would still be incorrect in the presence of aliasing. There is the switch -polly-ignore-aliasing to explicitly allow this, but it is hardly a justification for the additional complexity. It would still be mostly useless because in most cases either getNextBasePtr() has external linkage in which case the readnone nounwind attributes cannot be derived in the translation unit itself, or is defined in the same translation unit and gets inlined. Reviewed By: grosser Differential Revision: https://reviews.llvm.org/D30695 llvm-svn: 297281
2017-03-08 23:14:46 +08:00
; CHECK-NOT: Valid Region for Scop
declare float* @getNextBasePtr2(float*) readnone nounwind
define void @base_pointer_is_inst_inside_invariant_2(i64 %n, float* %A) {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
%ptr = call float* @getNextBasePtr2(float* %A)
%ptr2 = call float* @getNextBasePtr(float* %ptr)
br label %S1
S1:
%conv = sitofp i64 %indvar.i to float
%arrayidx5 = getelementptr float, float* %ptr2, i64 %indvar.i
store float %conv, float* %arrayidx5, align 4
br label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK-LABEL: base_pointer_is_inst_inside_invariant_2
[ScopDetection] Only allow SCoP-wide available base pointers. Simplify ScopDetection::isInvariant(). Essentially deny everything that is defined within the SCoP and is not load-hoisted. The previous understanding of "invariant" has a few holes: - Expressions without side-effects with only invariant arguments, but are defined withing the SCoP's region with the exception of selects and PHIs. These should be part of the index expression derived by ScalarEvolution and not of the base pointer. - Function calls with that are !mayHaveSideEffects() (typically functions with "readnone nounwind" attributes). An example is given below. @C = external global i32 declare float* @getNextBasePtr(float*) readnone nounwind ... %ptr = call float* @getNextBasePtr(float* %A, float %B) The call might return: * %A, so %ptr aliases with it in the SCoP * %B, so %ptr aliases with it in the SCoP * @C, so %ptr aliases with it in the SCoP * a new pointer everytime it is called, such as malloc() * a pointer into the allocated block of one of the aforementioned * any of the above, at random at each call Hence and contrast to a comment in the base_pointer.ll regression test, %ptr is not necessarily the same all the time. It might also alias with anything and no AliasAnalysis can tell otherwise if the definition is external. It is hence not suitable in the role of a base pointer. The practical problem with base pointers defined in SCoP statements is that it is not available globally in the SCoP. The statement instance must be executed first before the base pointer can be used. This is no problem if the base pointer is transferred as a scalar value between statements. Uses of MemoryAccess::setNewAccessRelation may add a use of the base pointer anywhere in the array. setNewAccessRelation is used by JSONImporter, DeLICM and D28518. Indeed, BlockGenerator currently assumes that base pointers are available globally and generates invalid code for new access relation (referring to the base pointer of the original code) if not, even if the base pointer would be available in the statement. This could be fixed with some added complexity and restrictions. The ExprBuilder must lookup the local BBMap and code that call setNewAccessRelation must check whether the base pointer is available first. The code would still be incorrect in the presence of aliasing. There is the switch -polly-ignore-aliasing to explicitly allow this, but it is hardly a justification for the additional complexity. It would still be mostly useless because in most cases either getNextBasePtr() has external linkage in which case the readnone nounwind attributes cannot be derived in the translation unit itself, or is defined in the same translation unit and gets inlined. Reviewed By: grosser Differential Revision: https://reviews.llvm.org/D30695 llvm-svn: 297281
2017-03-08 23:14:46 +08:00
; CHECK-NOT: Valid Region for Scop
declare float* @getNextBasePtr3(float*, i64) readnone nounwind
define void @base_pointer_is_inst_inside_variant(i64 %n, float* %A) {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
%ptr = call float* @getNextBasePtr3(float* %A, i64 %indvar.i)
%ptr2 = call float* @getNextBasePtr(float* %ptr)
br label %S1
S1:
%conv = sitofp i64 %indvar.i to float
%arrayidx5 = getelementptr float, float* %ptr2, i64 %indvar.i
store float %conv, float* %arrayidx5, align 4
br label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK: base_pointer_is_inst_inside_variant
; CHECK-NOT: Valid Region for Scop
define void @base_pointer_is_ptr2ptr(float** noalias %A, i64 %n) {
entry:
br label %for.i
for.i:
%indvar.i = phi i64 [ %indvar.i.next, %for.i.inc ], [ 0, %entry ]
%arrayidx = getelementptr float*, float** %A, i64 %indvar.i
br label %for.j
for.j:
%indvar.j = phi i64 [ 0, %for.i ], [ %indvar.j.next, %for.j ]
%conv = sitofp i64 %indvar.i to float
%basepointer = load float*, float** %arrayidx, align 8
%arrayidx5 = getelementptr float, float* %basepointer, i64 %indvar.j
store float %conv, float* %arrayidx5, align 4
%indvar.j.next = add i64 %indvar.j, 1
%exitcond.j = icmp ne i64 %indvar.j.next, %n
br i1 %exitcond.j, label %for.j, label %for.i.inc
for.i.inc:
%indvar.i.next = add i64 %indvar.i, 1
%exitcond.i = icmp ne i64 %indvar.i.next, %n
br i1 %exitcond.i, label %for.i, label %exit
exit:
ret void
}
; CHECK: base_pointer_is_ptr2ptr
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
; CHECK: Valid Region for Scop: for.j => for.i.inc