From 869949230475d243904d4d528b645d009aad3304 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 25 Jun 2018 15:13:26 +0000 Subject: [PATCH] [DA] Delinearise AddRecs if we can prove they don't wrap We can prove that some delinearized subscripts do not wrap around to become negative by the fact that they are from inbound geps of load/store locations. This helps improve the delinearisation in cases where we can't prove that they are non-negative from SCEV alone. Differential Revision: https://reviews.llvm.org/D48481 llvm-svn: 335481 --- .../llvm/Analysis/DependenceAnalysis.h | 5 +++ llvm/lib/Analysis/DependenceAnalysis.cpp | 23 ++++++++++-- .../Analysis/DependenceAnalysis/DADelin.ll | 36 ++++++++++++++++++- 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index b58e01fe021b..c8ec737a2cb9 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -563,6 +563,11 @@ template class ArrayRef; /// bounds. bool isKnownLessThan(const SCEV *S, const SCEV *Size) const; + /// isKnownNonNegative - Compare to see if S is known not to be negative + /// Uses the fact that S comes from Ptr, which may be an inbound GEP, + /// Proving there is no wrapping going on. + bool isKnownNonNegative(const SCEV *S, const Value *Ptr) const; + /// collectUpperBound - All subscripts are the same type (on my machine, /// an i64). The loop bound may be a smaller type. collectUpperBound /// find the bound, if available, and zero extends it to the Type T. diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 54078b905237..79c2728d5620 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1027,6 +1027,25 @@ bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { return SE->isKnownNegative(LimitedBound); } +bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { + bool Inbounds = false; + if (auto *SrcGEP = dyn_cast(Ptr)) + Inbounds = SrcGEP->isInBounds(); + if (Inbounds) { + if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) { + if (AddRec->isAffine()) { + // We know S is for Ptr, the operand on a load/store, so doesn't wrap. + // If both parts are NonNegative, the end result will be NonNegative + if (SE->isKnownNonNegative(AddRec->getStart()) && + SE->isKnownNonNegative(AddRec->getOperand(1))) + return true; + } + } + } + + return SE->isKnownNonNegative(S); +} + // All subscripts are all the same type. // Loop bound may be smaller (e.g., a char). // Should zero extend loop bound, since it's always >= 0. @@ -3292,13 +3311,13 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, // FIXME: It may be better to record these sizes and add them as constraints // to the dependency checks. for (int i = 1; i < size; ++i) { - if (!SE->isKnownNonNegative(SrcSubscripts[i])) + if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) return false; if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) return false; - if (!SE->isKnownNonNegative(DstSubscripts[i])) + if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) return false; if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll index 3572e708b83d..8438c2bf4259 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll @@ -553,4 +553,38 @@ for.inc10: ; preds = %for.body5, %for.bod for.end12: ; preds = %for.inc10, %entry ret double undef -} \ No newline at end of file +} + + +; CHECK-LABEL: nonnegative +define void @nonnegative(i32* nocapture %A, i32 %N) { +; CHECK: da analyze - none! +; CHECK: da analyze - consistent output [0 0|<]! +; CHECK: da analyze - none! +entry: + %cmp44 = icmp eq i32 %N, 0 + br i1 %cmp44, label %exit, label %for.outer + +for.outer: + %h.045 = phi i32 [ %add19, %for.latch ], [ 0, %entry ] + %mul = mul i32 %h.045, %N + br label %for.inner + +for.inner: + %i.043 = phi i32 [ 0, %for.outer ], [ %add16, %for.inner ] + %add = add i32 %i.043, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add + store i32 1, i32* %arrayidx, align 4 + store i32 2, i32* %arrayidx, align 4 + %add16 = add nuw i32 %i.043, 1 + %exitcond46 = icmp eq i32 %add16, %N + br i1 %exitcond46, label %for.latch, label %for.inner + +for.latch: + %add19 = add nuw i32 %h.045, 1 + %exitcond47 = icmp eq i32 %add19, %N + br i1 %exitcond47, label %exit, label %for.outer + +exit: + ret void +}