[LoopAccessAnalysis] Teach LAA to check the memory dependence between strided accesses.

Differential Revision: http://reviews.llvm.org/D9368

llvm-svn: 239285
This commit is contained in:
Hao Liu 2015-06-08 04:48:37 +00:00
parent 0718c1a4d7
commit 751004a67d
2 changed files with 641 additions and 12 deletions

View File

@ -678,6 +678,42 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
return false; return false;
} }
/// \brief Check the dependence for two accesses with the same stride \p Stride.
/// \p Distance is the positive distance and \p TypeByteSize is type size in
/// bytes.
///
/// \returns true if they are independent.
static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
unsigned TypeByteSize) {
assert(Stride > 1 && "The stride must be greater than 1");
assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
assert(Distance > 0 && "The distance must be non-zero");
// Skip if the distance is not multiple of type byte size.
if (Distance % TypeByteSize)
return false;
unsigned ScaledDist = Distance / TypeByteSize;
// No dependence if the scaled distance is not multiple of the stride.
// E.g.
// for (i = 0; i < 1024 ; i += 4)
// A[i+2] = A[i] + 1;
//
// Two accesses in memory (scaled distance is 2, stride is 4):
// | A[0] | | | | A[4] | | | |
// | | | A[2] | | | | A[6] | |
//
// E.g.
// for (i = 0; i < 1024 ; i += 3)
// A[i+4] = A[i] + 1;
//
// Two accesses in memory (scaled distance is 4, stride is 3):
// | A[0] | | | A[3] | | | A[6] | | |
// | | | | | A[4] | | | A[7] | |
return ScaledDist % Stride;
}
MemoryDepChecker::Dependence::DepType MemoryDepChecker::Dependence::DepType
MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx, const MemAccessInfo &B, unsigned BIdx,
@ -778,34 +814,87 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
unsigned Distance = (unsigned) Val.getZExtValue(); unsigned Distance = (unsigned) Val.getZExtValue();
unsigned Stride = std::abs(StrideAPtr);
if (Stride > 1 &&
areStridedAccessesIndependent(Distance, Stride, TypeByteSize))
return Dependence::NoDep;
// Bail out early if passed-in parameters make vectorization not feasible. // Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
VectorizerParams::VectorizationFactor : 1); VectorizerParams::VectorizationFactor : 1);
unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
VectorizerParams::VectorizationInterleave : 1); VectorizerParams::VectorizationInterleave : 1);
// The minimum number of iterations for a vectorized/unrolled version.
unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
// The distance must be bigger than the size needed for a vectorized version // It's not vectorizable if the distance is smaller than the minimum distance
// of the operation and the size of the vectorized operation must not be // needed for a vectroized/unrolled version. Vectorizing one iteration in
// bigger than the currrent maximum size. // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
if (Distance < 2*TypeByteSize || // TypeByteSize (No need to plus the last gap distance).
2*TypeByteSize > MaxSafeDepDistBytes || //
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
DEBUG(dbgs() << "LAA: Failure because of Positive distance " // foo(int *A) {
<< Val.getSExtValue() << '\n'); // int *B = (int *)((char *)A + 14);
// for (i = 0 ; i < 1024 ; i += 2)
// B[i] = A[i] + 1;
// }
//
// Two accesses in memory (stride is 2):
// | A[0] | | A[2] | | A[4] | | A[6] | |
// | B[0] | | B[2] | | B[4] |
//
// Distance needs for vectorizing iterations except the last iteration:
// 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
// So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
//
// If MinNumIter is 2, it is vectorizable as the minimum distance needed is
// 12, which is less than distance.
//
// If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
// the minimum distance needed is 28, which is greater than distance. It is
// not safe to do vectorization.
unsigned MinDistanceNeeded =
TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
if (MinDistanceNeeded > Distance) {
DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
<< '\n');
return Dependence::Backward;
}
// Unsafe if the minimum distance needed is greater than max safe distance.
if (MinDistanceNeeded > MaxSafeDepDistBytes) {
DEBUG(dbgs() << "LAA: Failure because it needs at least "
<< MinDistanceNeeded << " size in bytes");
return Dependence::Backward; return Dependence::Backward;
} }
// Positive distance bigger than max vectorization factor. // Positive distance bigger than max vectorization factor.
MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ? // FIXME: Should use max factor instead of max distance in bytes, which could
Distance : MaxSafeDepDistBytes; // not handle different types.
// E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
// void foo (int *A, char *B) {
// for (unsigned i = 0; i < 1024; i++) {
// A[i+2] = A[i] + 1;
// B[i+2] = B[i] + 1;
// }
// }
//
// This case is currently unsafe according to the max safe distance. If we
// analyze the two accesses on array B, the max safe dependence distance
// is 2. Then we analyze the accesses on array A, the minimum distance needed
// is 8, which is less than 2 and forbidden vectorization, But actually
// both A and B could be vectorized by 2 iterations.
MaxSafeDepDistBytes =
Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
bool IsTrueDataDependence = (!AIsWrite && BIsWrite); bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
if (IsTrueDataDependence && if (IsTrueDataDependence &&
couldPreventStoreLoadForward(Distance, TypeByteSize)) couldPreventStoreLoadForward(Distance, TypeByteSize))
return Dependence::BackwardVectorizableButPreventsForwarding; return Dependence::BackwardVectorizableButPreventsForwarding;
DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
" with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); << " with max VF = "
<< MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
return Dependence::BackwardVectorizable; return Dependence::BackwardVectorizable;
} }

View File

@ -0,0 +1,540 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; Following cases are no dependence.
; void nodep_Read_Write(int *A) {
; int *B = A + 1;
; for (unsigned i = 0; i < 1024; i+=3)
; B[i] = A[i] + 1;
; }
; CHECK: function 'nodep_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Run-time memory checks:
define void @nodep_Read_Write(i32* nocapture %A) {
entry:
%add.ptr = getelementptr inbounds i32, i32* %A, i64 1
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, 1
%arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; int nodep_Write_Read(int *A) {
; int sum = 0;
; for (unsigned i = 0; i < 1024; i+=4) {
; A[i] = i;
; sum += A[i+3];
; }
;
; return sum;
; }
; CHECK: function 'nodep_Write_Read':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Run-time memory checks:
define i32 @nodep_Write_Read(i32* nocapture %A) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret i32 %add3
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%sum.013 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%1 = or i64 %indvars.iv, 3
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %1
%2 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %2, %sum.013
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; void nodep_Write_Write(int *A) {
; for (unsigned i = 0; i < 1024; i+=2) {
; A[i] = i;
; A[i+1] = i+1;
; }
; }
; CHECK: function 'nodep_Write_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Run-time memory checks:
define void @nodep_Write_Write(i32* nocapture %A) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%1 = or i64 %indvars.iv, 1
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %1
%2 = trunc i64 %1 to i32
store i32 %2, i32* %arrayidx3, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; Following cases are unsafe depdences and are not vectorizable.
; void unsafe_Read_Write(int *A) {
; for (unsigned i = 0; i < 1024; i+=3)
; A[i+3] = A[i] + 1;
; }
; CHECK: function 'unsafe_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx3, align 4
define void @unsafe_Read_Write(i32* nocapture %A) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%i.010 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
%idxprom = zext i32 %i.010 to i64
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, 1
%add1 = add i32 %i.010, 3
%idxprom2 = zext i32 %add1 to i64
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
store i32 %add, i32* %arrayidx3, align 4
%cmp = icmp ult i32 %add1, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; int unsafe_Write_Read(int *A) {
; int sum = 0;
; for (unsigned i = 0; i < 1024; i+=4) {
; A[i] = i;
; sum += A[i+4];
; }
;
; return sum;
; }
; CHECK: function 'unsafe_Write_Read':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
define i32 @unsafe_Write_Read(i32* nocapture %A) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret i32 %add3
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%sum.013 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
%1 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %1, %sum.013
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; void unsafe_Write_Write(int *A) {
; for (unsigned i = 0; i < 1024; i+=2) {
; A[i] = i;
; A[i+2] = i+1;
; }
; }
; CHECK: function 'unsafe_Write_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %2, i32* %arrayidx3, align 4
define void @unsafe_Write_Write(i32* nocapture %A) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%1 = or i64 %indvars.iv, 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
%2 = trunc i64 %1 to i32
store i32 %2, i32* %arrayidx3, align 4
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; Following cases check that strided accesses can be vectorized.
; void vectorizable_Read_Write(int *A) {
; int *B = A + 4;
; for (unsigned i = 0; i < 1024; i+=2)
; B[i] = A[i] + 1;
; }
; CHECK: function 'vectorizable_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
define void @vectorizable_Read_Write(i32* nocapture %A) {
entry:
%add.ptr = getelementptr inbounds i32, i32* %A, i64 4
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, 1
%arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; int vectorizable_Write_Read(int *A) {
; int *B = A + 4;
; int sum = 0;
; for (unsigned i = 0; i < 1024; i+=2) {
; A[i] = i;
; sum += B[i];
; }
;
; return sum;
; }
; CHECK: function 'vectorizable_Write_Read':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
define i32 @vectorizable_Write_Read(i32* nocapture %A) {
entry:
%add.ptr = getelementptr inbounds i32, i32* %A, i64 4
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret i32 %add
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
%1 = load i32, i32* %arrayidx2, align 4
%add = add nsw i32 %1, %sum.013
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; void vectorizable_Write_Write(int *A) {
; int *B = A + 4;
; for (unsigned i = 0; i < 1024; i+=2) {
; A[i] = i;
; B[i] = i+1;
; }
; }
; CHECK: function 'vectorizable_Write_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4
define void @vectorizable_Write_Write(i32* nocapture %A) {
entry:
%add.ptr = getelementptr inbounds i32, i32* %A, i64 4
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%1 = or i64 %indvars.iv, 1
%arrayidx2 = getelementptr inbounds i32, i32* %add.ptr, i64 %indvars.iv
%2 = trunc i64 %1 to i32
store i32 %2, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; void vectorizable_unscaled_Read_Write(int *A) {
; int *B = (int *)((char *)A + 14);
; for (unsigned i = 0; i < 1024; i+=2)
; B[i] = A[i] + 1;
; }
; FIXME: This case looks like previous case @vectorizable_Read_Write. It sould
; be vectorizable.
; CHECK: function 'vectorizable_unscaled_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
define void @vectorizable_unscaled_Read_Write(i32* nocapture %A) {
entry:
%0 = bitcast i32* %A to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 14
%1 = bitcast i8* %add.ptr to i32*
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%2 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %2, 1
%arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; int vectorizable_unscaled_Write_Read(int *A) {
; int *B = (int *)((char *)A + 17);
; int sum = 0;
; for (unsigned i = 0; i < 1024; i+=2) {
; A[i] = i;
; sum += B[i];
; }
;
; return sum;
; }
; CHECK: for function 'vectorizable_unscaled_Write_Read':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: store i32 %2, i32* %arrayidx, align 4 ->
; CHECK-NEXT: %3 = load i32, i32* %arrayidx2, align 4
define i32 @vectorizable_unscaled_Write_Read(i32* nocapture %A) {
entry:
%0 = bitcast i32* %A to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 17
%1 = bitcast i8* %add.ptr to i32*
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret i32 %add
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%2 = trunc i64 %indvars.iv to i32
store i32 %2, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
%3 = load i32, i32* %arrayidx2, align 4
%add = add nsw i32 %3, %sum.013
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; void unsafe_unscaled_Read_Write(int *A) {
; int *B = (int *)((char *)A + 11);
; for (unsigned i = 0; i < 1024; i+=2)
; B[i] = A[i] + 1;
; }
; CHECK: function 'unsafe_unscaled_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
define void @unsafe_unscaled_Read_Write(i32* nocapture %A) {
entry:
%0 = bitcast i32* %A to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 11
%1 = bitcast i8* %add.ptr to i32*
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%2 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %2, 1
%arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; CHECK: function 'unsafe_unscaled_Read_Write2':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
; void unsafe_unscaled_Read_Write2(int *A) {
; int *B = (int *)((char *)A + 1);
; for (unsigned i = 0; i < 1024; i+=2)
; B[i] = A[i] + 1;
; }
define void @unsafe_unscaled_Read_Write2(i32* nocapture %A) {
entry:
%0 = bitcast i32* %A to i8*
%add.ptr = getelementptr inbounds i8, i8* %0, i64 1
%1 = bitcast i8* %add.ptr to i32*
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%2 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %2, 1
%arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
; Following case checks that interleaved stores have dependences with another
; store and can not pass dependence check.
; void interleaved_stores(int *A) {
; int *B = (int *) ((char *)A + 1);
; for(int i = 0; i < 1024; i+=2) {
; B[i] = i; // (1)
; A[i+1] = i + 1; // (2)
; B[i+1] = i + 1; // (3)
; }
; }
;
; The access (2) has overlaps with (1) and (3).
; CHECK: function 'interleaved_stores':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NEXT: Interesting Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4 ->
; CHECK-NEXT: store i32 %4, i32* %arrayidx9, align 4
; CHECK: Backward:
; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4 ->
; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4
define void @interleaved_stores(i32* nocapture %A) {
entry:
%0 = bitcast i32* %A to i8*
%incdec.ptr = getelementptr inbounds i8, i8* %0, i64 1
%1 = bitcast i8* %incdec.ptr to i32*
br label %for.body
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%2 = trunc i64 %indvars.iv to i32
%arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
store i32 %2, i32* %arrayidx2, align 4
%3 = or i64 %indvars.iv, 1
%arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %3
%4 = trunc i64 %3 to i32
store i32 %4, i32* %arrayidx5, align 4
%arrayidx9 = getelementptr inbounds i32, i32* %1, i64 %3
store i32 %4, i32* %arrayidx9, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp slt i64 %indvars.iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
}