Use modulo semantic to generate non-integer-overflow assumptions

This will allow to generate non-wrap assumptions for integer expressions
  that are part of the SCoP. We compare the common isl representation of
  the expression with one computed with modulo semantic. For all parameter
  combinations they are not equal we can have integer overflows.

  The nsw flags are respected when the modulo representation is computed,
  nuw and nw flags are ignored for now.

  In order to not increase compile time to much, the non-wrap assumptions
  are collected in a separate boundary context instead of the assumed
  context. This helps compile time as the boundary context can become
  complex and it is therefor not advised to use it in other operations
  except runtime check generation. However, the assumed context is e.g.,
  used to tighten dependences. While the boundary context might help to
  tighten the assumed context it is doubtful that it will help in practice
  (it does not effect lnt much) as the boundary (or no-wrap assumptions)
  only restrict the very end of the possible value range of parameters.

  PET uses a different approach to compute the no-wrap context, though lnt runs
  have shown that this version performs slightly better for us.

llvm-svn: 247732
This commit is contained in:
Johannes Doerfert 2015-09-15 22:52:53 +00:00
parent cef616fe2d
commit 883f8c1d2f
35 changed files with 865 additions and 37 deletions

View File

@ -950,6 +950,17 @@ private:
/// this scop and that need to be code generated as a run-time test. /// this scop and that need to be code generated as a run-time test.
isl_set *AssumedContext; isl_set *AssumedContext;
/// @brief The boundary assumptions under which this scop was built.
///
/// The boundary context is similar to the assumed context as it contains
/// constraints over the parameters we assume to be true. However, the
/// boundary context is less useful for dependence analysis and
/// simplification purposes as it contains only constraints that affect the
/// boundaries of the parameter ranges. As these constraints can become quite
/// complex, the boundary context and the assumed context are separated as a
/// meassure to save compile time.
isl_set *BoundaryContext;
/// @brief The schedule of the SCoP /// @brief The schedule of the SCoP
/// ///
/// The schedule of the SCoP describes the execution order of the statements /// The schedule of the SCoP describes the execution order of the statements
@ -1059,14 +1070,17 @@ private:
/// @brief Build the Context of the Scop. /// @brief Build the Context of the Scop.
void buildContext(); void buildContext();
/// @brief Build the BoundaryContext based on the wrapping of expressions.
void buildBoundaryContext();
/// @brief Add user provided parameter constraints to context. /// @brief Add user provided parameter constraints to context.
void addUserContext(); void addUserContext();
/// @brief Add the bounds of the parameters to the context. /// @brief Add the bounds of the parameters to the context.
void addParameterBounds(); void addParameterBounds();
/// @brief Simplify the assumed context. /// @brief Simplify the assumed and boundary context.
void simplifyAssumedContext(); void simplifyContexts();
/// @brief Create a new SCoP statement for either @p BB or @p R. /// @brief Create a new SCoP statement for either @p BB or @p R.
/// ///
@ -1240,6 +1254,11 @@ public:
/// to hold. /// to hold.
void addAssumption(__isl_take isl_set *Set); void addAssumption(__isl_take isl_set *Set);
/// @brief Get the boundary context for this Scop.
///
/// @return The boundary context of this Scop.
__isl_give isl_set *getBoundaryContext() const;
/// @brief Build the alias checks for this SCoP. /// @brief Build the alias checks for this SCoP.
void buildAliasChecks(AliasAnalysis &AA); void buildAliasChecks(AliasAnalysis &AA);
@ -1259,6 +1278,9 @@ public:
/// @brief Get an isl string representing the assumed context. /// @brief Get an isl string representing the assumed context.
std::string getAssumedContextStr() const; std::string getAssumedContextStr() const;
/// @brief Get an isl string representing the boundary context.
std::string getBoundaryContextStr() const;
/// @brief Return the stmt for the given @p BB or nullptr if none. /// @brief Return the stmt for the given @p BB or nullptr if none.
ScopStmt *getStmtForBasicBlock(BasicBlock *BB) const; ScopStmt *getStmtForBasicBlock(BasicBlock *BB) const;

View File

@ -35,6 +35,7 @@ struct isl_schedule;
namespace llvm { namespace llvm {
class Region; class Region;
class BasicBlock; class BasicBlock;
class DataLayout;
class ScalarEvolution; class ScalarEvolution;
} }
@ -57,6 +58,14 @@ public:
__isl_give isl_pw_aff *getPwAff(const llvm::SCEV *E, __isl_give isl_pw_aff *getPwAff(const llvm::SCEV *E,
llvm::BasicBlock *BB = nullptr); llvm::BasicBlock *BB = nullptr);
/// @brief Compute the context in which integer wrapping is happending.
///
/// This context contains all parameter configurations for which we
/// know that the wrapping and non-wrapping expressions are different.
///
/// @returns The context in which integer wrapping is happening.
__isl_give isl_set *getWrappingContext() const;
private: private:
/// @brief Key to identify cached expressions. /// @brief Key to identify cached expressions.
using CacheKey = std::pair<const llvm::SCEV *, llvm::BasicBlock *>; using CacheKey = std::pair<const llvm::SCEV *, llvm::BasicBlock *>;
@ -71,6 +80,27 @@ private:
llvm::ScalarEvolution &SE; llvm::ScalarEvolution &SE;
llvm::BasicBlock *BB; llvm::BasicBlock *BB;
/// @brief Target data for element size computing.
const llvm::DataLayout &TD;
/// @brief Compute the non-wrapping version of @p PWA for type @p ExprType.
///
/// @param PWA The piece-wise affine function that might wrap.
/// @param Type The type of the SCEV that was translated to @p PWA.
///
/// @returns The expr @p PWA modulo the size constraints of @p ExprType.
__isl_give isl_pw_aff *addModuloSemantic(__isl_take isl_pw_aff *PWA,
llvm::Type *ExprType) const;
/// @brief Compute the context in which integer wrapping for @p PWA happens.
///
/// @returns The context in which integer wrapping happens or nullptr if
/// empty.
__isl_give isl_set *getWrappingContext(llvm::SCEV::NoWrapFlags Flags,
llvm::Type *ExprType,
__isl_keep isl_pw_aff *PWA,
__isl_keep isl_set *ExprDomain) const;
__isl_give isl_pw_aff *visit(const llvm::SCEV *E); __isl_give isl_pw_aff *visit(const llvm::SCEV *E);
__isl_give isl_pw_aff *visitConstant(const llvm::SCEVConstant *E); __isl_give isl_pw_aff *visitConstant(const llvm::SCEVConstant *E);
__isl_give isl_pw_aff *visitTruncateExpr(const llvm::SCEVTruncateExpr *E); __isl_give isl_pw_aff *visitTruncateExpr(const llvm::SCEVTruncateExpr *E);

View File

@ -1248,6 +1248,12 @@ isl_set *Scop::addNonEmptyDomainConstraints(isl_set *C) const {
return isl_set_intersect_params(C, DomainContext); return isl_set_intersect_params(C, DomainContext);
} }
void Scop::buildBoundaryContext() {
BoundaryContext = Affinator.getWrappingContext();
BoundaryContext = isl_set_complement(BoundaryContext);
BoundaryContext = isl_set_gist_params(BoundaryContext, getContext());
}
void Scop::addUserContext() { void Scop::addUserContext() {
if (UserContextStr.empty()) if (UserContextStr.empty())
return; return;
@ -1328,7 +1334,16 @@ void Scop::realignParams() {
Stmt.realignParams(); Stmt.realignParams();
} }
void Scop::simplifyAssumedContext() { static __isl_give isl_set *
simplifyAssumptionContext(__isl_take isl_set *AssumptionContext,
const Scop &S) {
isl_set *DomainParameters = isl_union_set_params(S.getDomains());
AssumptionContext = isl_set_gist_params(AssumptionContext, DomainParameters);
AssumptionContext = isl_set_gist_params(AssumptionContext, S.getContext());
return AssumptionContext;
}
void Scop::simplifyContexts() {
// The parameter constraints of the iteration domains give us a set of // The parameter constraints of the iteration domains give us a set of
// constraints that need to hold for all cases where at least a single // constraints that need to hold for all cases where at least a single
// statement iteration is executed in the whole scop. We now simplify the // statement iteration is executed in the whole scop. We now simplify the
@ -1357,9 +1372,8 @@ void Scop::simplifyAssumedContext() {
// we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
// otherwise we would access out of bound data. Now, knowing that code is // otherwise we would access out of bound data. Now, knowing that code is
// only executed for the case m >= 0, it is sufficient to assume p >= 0. // only executed for the case m >= 0, it is sufficient to assume p >= 0.
AssumedContext = AssumedContext = simplifyAssumptionContext(AssumedContext, *this);
isl_set_gist_params(AssumedContext, isl_union_set_params(getDomains())); BoundaryContext = simplifyAssumptionContext(BoundaryContext, *this);
AssumedContext = isl_set_gist_params(AssumedContext, getContext());
} }
/// @brief Add the minimal/maximal access in @p Set to @p User. /// @brief Add the minimal/maximal access in @p Set to @p User.
@ -2118,7 +2132,8 @@ Scop::Scop(Region &R, AccFuncMapType &AccFuncMap,
isl_ctx *Context, unsigned MaxLoopDepth) isl_ctx *Context, unsigned MaxLoopDepth)
: DT(DT), SE(&ScalarEvolution), R(R), AccFuncMap(AccFuncMap), : DT(DT), SE(&ScalarEvolution), R(R), AccFuncMap(AccFuncMap),
IsOptimized(false), HasSingleExitEdge(R.getExitingBlock()), IsOptimized(false), HasSingleExitEdge(R.getExitingBlock()),
MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this) {} MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this),
BoundaryContext(nullptr) {}
void Scop::init(LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) { void Scop::init(LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) {
buildContext(); buildContext();
@ -2135,13 +2150,15 @@ void Scop::init(LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) {
realignParams(); realignParams();
addParameterBounds(); addParameterBounds();
addUserContext(); addUserContext();
simplifyAssumedContext(); buildBoundaryContext();
simplifyContexts();
buildAliasChecks(AA); buildAliasChecks(AA);
} }
Scop::~Scop() { Scop::~Scop() {
isl_set_free(Context); isl_set_free(Context);
isl_set_free(AssumedContext); isl_set_free(AssumedContext);
isl_set_free(BoundaryContext);
isl_schedule_free(Schedule); isl_schedule_free(Schedule);
for (auto It : DomainMap) for (auto It : DomainMap)
@ -2180,6 +2197,9 @@ std::string Scop::getContextStr() const { return stringFromIslObj(Context); }
std::string Scop::getAssumedContextStr() const { std::string Scop::getAssumedContextStr() const {
return stringFromIslObj(AssumedContext); return stringFromIslObj(AssumedContext);
} }
std::string Scop::getBoundaryContextStr() const {
return stringFromIslObj(BoundaryContext);
}
std::string Scop::getNameStr() const { std::string Scop::getNameStr() const {
std::string ExitName, EntryName; std::string ExitName, EntryName;
@ -2209,6 +2229,9 @@ __isl_give isl_set *Scop::getAssumedContext() const {
__isl_give isl_set *Scop::getRuntimeCheckContext() const { __isl_give isl_set *Scop::getRuntimeCheckContext() const {
isl_set *RuntimeCheckContext = getAssumedContext(); isl_set *RuntimeCheckContext = getAssumedContext();
RuntimeCheckContext =
isl_set_intersect(RuntimeCheckContext, getBoundaryContext());
RuntimeCheckContext = simplifyAssumptionContext(RuntimeCheckContext, *this);
return RuntimeCheckContext; return RuntimeCheckContext;
} }
@ -2225,6 +2248,10 @@ void Scop::addAssumption(__isl_take isl_set *Set) {
AssumedContext = isl_set_coalesce(AssumedContext); AssumedContext = isl_set_coalesce(AssumedContext);
} }
__isl_give isl_set *Scop::getBoundaryContext() const {
return isl_set_copy(BoundaryContext);
}
void Scop::printContext(raw_ostream &OS) const { void Scop::printContext(raw_ostream &OS) const {
OS << "Context:\n"; OS << "Context:\n";
@ -2243,6 +2270,14 @@ void Scop::printContext(raw_ostream &OS) const {
OS.indent(4) << getAssumedContextStr() << "\n"; OS.indent(4) << getAssumedContextStr() << "\n";
OS.indent(4) << "Boundary Context:\n";
if (!BoundaryContext) {
OS.indent(4) << "n/a\n\n";
return;
}
OS.indent(4) << getBoundaryContextStr() << "\n";
for (const SCEV *Parameter : Parameters) { for (const SCEV *Parameter : Parameters) {
int Dim = ParameterIds.find(Parameter)->second; int Dim = ParameterIds.find(Parameter)->second;
OS.indent(4) << "p" << Dim << ": " << *Parameter << "\n"; OS.indent(4) << "p" << Dim << ": " << *Parameter << "\n";

View File

@ -27,7 +27,8 @@ using namespace llvm;
using namespace polly; using namespace polly;
SCEVAffinator::SCEVAffinator(Scop *S) SCEVAffinator::SCEVAffinator(Scop *S)
: S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()) {} : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()),
TD(R.getEntry()->getParent()->getParent()->getDataLayout()) {}
SCEVAffinator::~SCEVAffinator() { SCEVAffinator::~SCEVAffinator() {
for (const auto &CachedPair : CachedExpressions) for (const auto &CachedPair : CachedExpressions)
@ -50,6 +51,94 @@ __isl_give isl_pw_aff *SCEVAffinator::getPwAff(const SCEV *Expr,
return visit(Expr); return visit(Expr);
} }
__isl_give isl_set *
SCEVAffinator::getWrappingContext(SCEV::NoWrapFlags Flags, Type *ExprType,
__isl_keep isl_pw_aff *PWA,
__isl_take isl_set *ExprDomain) const {
// If the SCEV flags do contain NSW (no signed wrap) then PWA already
// represents Expr in modulo semantic (it is not allowed to overflow), thus we
// are done. Otherwise, we will compute:
// PWA = ((PWA + 2^(n-1)) mod (2 ^ n)) - 2^(n-1)
// whereas n is the number of bits of the Expr, hence:
// n = bitwidth(ExprType)
if (Flags & SCEV::FlagNSW)
return nullptr;
isl_pw_aff *PWAMod = addModuloSemantic(isl_pw_aff_copy(PWA), ExprType);
if (isl_pw_aff_is_equal(PWA, PWAMod)) {
isl_pw_aff_free(PWAMod);
return nullptr;
}
PWA = isl_pw_aff_copy(PWA);
auto *NotEqualSet = isl_pw_aff_ne_set(PWA, PWAMod);
NotEqualSet = isl_set_intersect(NotEqualSet, isl_set_copy(ExprDomain));
NotEqualSet = isl_set_gist_params(NotEqualSet, S->getContext());
NotEqualSet = isl_set_params(NotEqualSet);
return NotEqualSet;
}
__isl_give isl_set *SCEVAffinator::getWrappingContext() const {
isl_set *WrappingCtx = isl_set_empty(S->getParamSpace());
for (const auto &CachedPair : CachedExpressions) {
const SCEV *Expr = CachedPair.first.first;
SCEV::NoWrapFlags Flags;
switch (Expr->getSCEVType()) {
case scAddExpr:
Flags = cast<SCEVAddExpr>(Expr)->getNoWrapFlags();
break;
case scMulExpr:
Flags = cast<SCEVMulExpr>(Expr)->getNoWrapFlags();
break;
case scAddRecExpr:
Flags = cast<SCEVAddRecExpr>(Expr)->getNoWrapFlags();
break;
default:
continue;
}
isl_pw_aff *PWA = CachedPair.second;
BasicBlock *BB = CachedPair.first.second;
isl_set *ExprDomain = BB ? S->getDomainConditions(BB) : nullptr;
isl_set *WPWACtx =
getWrappingContext(Flags, Expr->getType(), PWA, ExprDomain);
isl_set_free(ExprDomain);
WrappingCtx = WPWACtx ? isl_set_union(WrappingCtx, WPWACtx) : WrappingCtx;
}
return WrappingCtx;
}
__isl_give isl_pw_aff *
SCEVAffinator::addModuloSemantic(__isl_take isl_pw_aff *PWA,
Type *ExprType) const {
unsigned Width = TD.getTypeStoreSizeInBits(ExprType);
isl_ctx *Ctx = isl_pw_aff_get_ctx(PWA);
isl_val *ModVal = isl_val_int_from_ui(Ctx, Width);
ModVal = isl_val_2exp(ModVal);
isl_val *AddVal = isl_val_int_from_ui(Ctx, Width - 1);
AddVal = isl_val_2exp(AddVal);
isl_set *Domain = isl_pw_aff_domain(isl_pw_aff_copy(PWA));
isl_pw_aff *AddPW = isl_pw_aff_val_on_domain(Domain, AddVal);
PWA = isl_pw_aff_add(PWA, isl_pw_aff_copy(AddPW));
PWA = isl_pw_aff_mod_val(PWA, ModVal);
PWA = isl_pw_aff_sub(PWA, AddPW);
return PWA;
}
__isl_give isl_pw_aff *SCEVAffinator::visit(const SCEV *Expr) { __isl_give isl_pw_aff *SCEVAffinator::visit(const SCEV *Expr) {
auto Key = std::make_pair(Expr, BB); auto Key = std::make_pair(Expr, BB);
@ -75,6 +164,11 @@ __isl_give isl_pw_aff *SCEVAffinator::visit(const SCEV *Expr) {
} }
PWA = SCEVVisitor<SCEVAffinator, isl_pw_aff *>::visit(Expr); PWA = SCEVVisitor<SCEVAffinator, isl_pw_aff *>::visit(Expr);
// For compile time reasons we need to simplify the PWA before we cache and
// return it.
PWA = isl_pw_aff_coalesce(PWA);
CachedExpressions[Key] = PWA; CachedExpressions[Key] = PWA;
return isl_pw_aff_copy(PWA); return isl_pw_aff_copy(PWA);
} }
@ -125,8 +219,6 @@ __isl_give isl_pw_aff *SCEVAffinator::visitAddExpr(const SCEVAddExpr *Expr) {
Sum = isl_pw_aff_add(Sum, NextSummand); Sum = isl_pw_aff_add(Sum, NextSummand);
} }
// TODO: Check for NSW and NUW.
return Sum; return Sum;
} }
@ -167,7 +259,6 @@ SCEVAffinator::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1); isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff); isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
// TODO: Do we need to check for NSW and NUW?
return isl_pw_aff_mul(Step, LPwAff); return isl_pw_aff_mul(Step, LPwAff);
} }

View File

@ -272,7 +272,11 @@ exit.2:
; VALUE: RAW dependences: ; VALUE: RAW dependences:
; VALUE: [p] -> { ; VALUE: [p] -> {
; VALUE: Stmt_S1[i0] -> Stmt_S2[-p + i0] : ; VALUE: Stmt_S1[i0] -> Stmt_S2[-p + i0] :
; VALUE: i0 >= 0 and i0 <= 9 + p and i0 >= p and i0 <= 99 and p <= 190 ; VALUE-DAG: p <= 190
; VALUE-DAG: i0 >= p
; VALUE-DAG: i0 <= 9 + p
; VALUE-DAG: i0 <= 99
; VALUE-DAG: i0 >= 0
; VALUE: } ; VALUE: }
; VALUE: WAR dependences: ; VALUE: WAR dependences:
; VALUE: [p] -> { ; VALUE: [p] -> {

View File

@ -5,7 +5,7 @@
; A[i] = B[c - 10] + B[5]; ; A[i] = B[c - 10] + B[5];
; } ; }
; ;
; CHECK: if (1 && (&MemRef_B[c <= 15 ? 6 : c - 9] <= &MemRef_A[0] || &MemRef_A[1024] <= &MemRef_B[c >= 15 ? 5 : c - 10])) ; CHECK: if (c >= -{{[0-9]*}} && (&MemRef_B[c <= 15 ? 6 : c - 9] <= &MemRef_A[0] || &MemRef_A[1024] <= &MemRef_B[c >= 15 ? 5 : c - 10]))
; CHECK: for (int c0 = 0; c0 <= 1023; c0 += 1) ; CHECK: for (int c0 = 0; c0 <= 1023; c0 += 1)
; CHECK: Stmt_for_body(c0); ; CHECK: Stmt_for_body(c0);
; CHECK: else ; CHECK: else

View File

@ -18,7 +18,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; cause any code to be executed are not generated. ; cause any code to be executed are not generated.
; CHECK: if ( ; CHECK: if (
; CHECK: (o >= 1 && q <= 0 && m + q >= 0) ; CHECK: (o >= 1 && n + p <= 9223372036854775808 && q <= 0 && m + q >= 0)
; CHECK: || ; CHECK: ||
; CHECK; (o <= 0 && m + q >= 100 && q <= 100) ; CHECK; (o <= 0 && m + q >= 100 && q <= 100)
; CHECK: ) ; CHECK: )

View File

@ -5,6 +5,7 @@
; A[i] = B[c - 10] + B[5]; ; A[i] = B[c - 10] + B[5];
; } ; }
; ;
; CHECK: sext i32 %c to i64
; CHECK: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 ; CHECK: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64
; CHECK: %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15 ; CHECK: %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15
; CHECK: %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64 ; CHECK: %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64
@ -22,7 +23,7 @@
; CHECK: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]] ; CHECK: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]]
; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]] ; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]]
; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]]
; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 true, %[[NoAlias]] ; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %1, %[[NoAlias]]
; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond ; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond
; ;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -28,7 +28,7 @@ exit:
; Check that we transform this into a pointer difference. ; Check that we transform this into a pointer difference.
; CODEGEN: %0 = ptrtoint i8* %end to i64 ; CODEGEN: %[[r0:[._a-zA-Z0-9]]] = ptrtoint i8* %end to i64
; CODEGEN: %1 = ptrtoint i8* %start to i64 ; CODEGEN: %[[r1:[._a-zA-Z0-9]]] = ptrtoint i8* %start to i64
; CODEGEN: %2 = sub i64 %0, %1 ; CODEGEN: %[[r2:[._a-zA-Z0-9]]] = sub i64 %[[r0]], %[[r1]]

View File

@ -46,6 +46,7 @@ return:
; CHECK: Stmt_store(c0); ; CHECK: Stmt_store(c0);
; CHECK: } ; CHECK: }
; CODEGEN: %0 = bitcast float* %P to i8* ; CODEGEN: %[[R0:[0-9]*]] = bitcast float* %P to i8*
; CODEGEN: %1 = icmp ule i8* %0, inttoptr (i64 -1 to i8*) ; CODEGEN: %[[R1:[0-9]*]] = bitcast float* %P to i8*
; CODEGEN-NEXT: icmp ule i8* %[[R1]], inttoptr (i64 -1 to i8*)

View File

@ -3,7 +3,7 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -polly-ignore-aliasing -disable-output < %s ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -polly-ignore-aliasing -disable-output < %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; SCALAR: if (1) ; SCALAR: if (
; SCALAR: { ; SCALAR: {
; SCALAR: Stmt_for_1(0); ; SCALAR: Stmt_for_1(0);
; SCALAR: for (int c0 = 1; c0 <= -Scalar0.val + 99; c0 += 1) ; SCALAR: for (int c0 = 1; c0 <= -Scalar0.val + 99; c0 += 1)

View File

@ -18,7 +18,19 @@
; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Region: %bb15---%bb13
; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Max Loop Depth: 1
; INNERMOST: Context: ; INNERMOST: Context:
; INNERMOST: [p_0, p_1, p_2] -> { : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 } ; INNERMOST: [p_0, p_1, p_2] -> { :
; INNERMOST-DAG: p_0 >= 0
; INNERMOST-DAG: and
; INNERMOST-DAG: p_0 <= 2147483647
; INNERMOST-DAG: and
; INNERMOST-DAG: p_1 >= 0
; INNERMOST-DAG: and
; INNERMOST-DAG: p_1 <= 4096
; INNERMOST-DAG: and
; INNERMOST-DAG: p_2 >= 0
; INNERMOST-DAG: and
; INNERMOST-DAG: p_2 <= 4096
; INNERMOST: }
; INNERMOST: Assumed Context: ; INNERMOST: Assumed Context:
; INNERMOST: [p_0, p_1, p_2] -> { : } ; INNERMOST: [p_0, p_1, p_2] -> { : }
; INNERMOST: p0: {0,+,{0,+,1}<nuw><nsw><%bb11>}<nuw><nsw><%bb13> ; INNERMOST: p0: {0,+,{0,+,1}<nuw><nsw><%bb11>}<nuw><nsw><%bb13>

View File

@ -10,7 +10,11 @@
; INNERMOST: Region: %bb9---%bb17 ; INNERMOST: Region: %bb9---%bb17
; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Max Loop Depth: 1
; INNERMOST: Context: ; INNERMOST: Context:
; INNERMOST: [N] -> { : N >= -2147483648 and N <= 2147483647 } ; INNERMOST: [N] -> { :
; INNERMOST-DAG: N >= -2147483648
; INNERMOST-DAG: and
; INNERMOST-DAG: N <= 2147483647
; INNERMOST }
; INNERMOST: Assumed Context: ; INNERMOST: Assumed Context:
; INNERMOST: [N] -> { : } ; INNERMOST: [N] -> { : }
; INNERMOST: p0: %N ; INNERMOST: p0: %N

View File

@ -9,7 +9,11 @@
; INNERMOST: Region: %bb9---%bb18 ; INNERMOST: Region: %bb9---%bb18
; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Max Loop Depth: 1
; INNERMOST: Context: ; INNERMOST: Context:
; INNERMOST: [p_0] -> { : p_0 >= -2199023255552 and p_0 <= 2199023254528 } ; INNERMOST: [p_0] -> { :
; INNERMOST-DAG: p_0 >= -2199023255552
; INNERMOST-DAG: and
; INNERMOST-DAG: p_0 <= 2199023254528
; INNERMOST: }
; INNERMOST: Assumed Context: ; INNERMOST: Assumed Context:
; INNERMOST: [p_0] -> { : } ; INNERMOST: [p_0] -> { : }
; INNERMOST: p0: {0,+,(sext i32 %N to i64)}<%bb3> ; INNERMOST: p0: {0,+,(sext i32 %N to i64)}<%bb3>

View File

@ -7,7 +7,11 @@
; CHECK: Region: %bb2---%bb24 ; CHECK: Region: %bb2---%bb24
; CHECK: Max Loop Depth: 1 ; CHECK: Max Loop Depth: 1
; CHECK: Context: ; CHECK: Context:
; CHECK: [N] -> { : N >= -2147483648 and N <= 2147483647 } ; CHECK: [N] -> { :
; CHECK-DAG: N >= -2147483648
; CHECK-DAG: and
; CHECK-DAG: N <= 2147483647
; CHECK: }
; CHECK: Assumed Context: ; CHECK: Assumed Context:
; CHECK: [N] -> { : } ; CHECK: [N] -> { : }
; CHECK: p0: %N ; CHECK: p0: %N

View File

@ -20,6 +20,8 @@
; CHECK-DAG: p <= 30 ; CHECK-DAG: p <= 30
; CHECK-DAG: and ; CHECK-DAG: and
; CHECK-DAG: m <= 20 ; CHECK-DAG: m <= 20
; CHECK-DAG: and
; CHECK-DAG: p <= 2305843009213694582 - 600n - 30m
; CHECK: } ; CHECK: }
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -11,8 +11,7 @@ entry:
bb: bb:
%indvar = phi i1024 [ 0, %entry ], [ %indvar.next, %bb ] %indvar = phi i1024 [ 0, %entry ], [ %indvar.next, %bb ]
%scevgep = getelementptr i1024, i1024* %a, i1024 %indvar store i1024 %indvar, i1024* %a, align 8
store i1024 %indvar, i1024* %scevgep, align 8
%indvar.next = add nsw i1024 %indvar, 1 %indvar.next = add nsw i1024 %indvar, 1
%exitcond = icmp eq i1024 %indvar, 123456000000000000000000000 %exitcond = icmp eq i1024 %indvar, 123456000000000000000000000
; CHECK: 'bb => return' in function 'f' ; CHECK: 'bb => return' in function 'f'

View File

@ -45,8 +45,6 @@ bb2: ; preds = %bb, %entry
ret i64 0 ret i64 0
} }
; CHECK: Context:
; CHECK: [n] -> { : }
; CHECK: Statements { ; CHECK: Statements {
; CHECK: Stmt_bb ; CHECK: Stmt_bb
; CHECK: Domain := ; CHECK: Domain :=

View File

@ -10,7 +10,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; } ; }
; CHECK: Assumed Context: ; CHECK: Assumed Context:
; CHECK: [m, p] -> { : } ; CHECK: [m, p] -> { : p <= 9223372036854775708 }
; CHECK: p0: %m ; CHECK: p0: %m
; CHECK: p1: %p ; CHECK: p1: %p
; CHECK: Statements { ; CHECK: Statements {

View File

@ -8,9 +8,21 @@
; } ; }
; ;
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 <= 1 and o0 >= 0) }; ; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2):
; CHECK-DAG: 2e0 = -i0 + o0
; CHECK-DAG: and
; CHECK-DAG: o0 <= 1
; CHECK-DAG: and
; CHECK-DAG: o0 >= 0
; CHECK: };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 <= 1 and o0 >= 0) }; ; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2):
; CHECK-DAG: 2e0 = -i0 + o0
; CHECK-DAG: and
; CHECK-DAG: o0 <= 1
; CHECK-DAG: and
; CHECK-DAG: o0 >= 0
; CHECK: };
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -24,7 +24,7 @@ bb:
br i1 %brcond, label %store, label %bb.backedge br i1 %brcond, label %store, label %bb.backedge
store: store:
%scevgep = getelementptr i64, i64* %a, i64 %i %scevgep = getelementptr inbounds i64, i64* %a, i64 %i
store i64 %i, i64* %scevgep store i64 %i, i64* %scevgep
br label %bb.backedge br label %bb.backedge

View File

@ -4,7 +4,11 @@
; range metadata (see bottom of the file) are present: ; range metadata (see bottom of the file) are present:
; ;
; CHECK: Context: ; CHECK: Context:
; CHECK: [p_0] -> { : p_0 >= 0 and p_0 <= 255 } ; CHECK: [p_0] -> { :
; CHECK-DAG: p_0 >= 0
; CHECK-DAG: and
; CHECK-DAG: p_0 <= 255
; CHECK: }
; ;
; void jd(int *A, int *p /* in [0,256) */) { ; void jd(int *A, int *p /* in [0,256) */) {
; for (int i = 0; i < 1024; i++) ; for (int i = 0; i < 1024; i++)

View File

@ -14,7 +14,7 @@ entry:
bb: ; preds = %bb, %entry bb: ; preds = %bb, %entry
%i = phi i64 [ 0, %entry ], [ %i.inc, %bb ] %i = phi i64 [ 0, %entry ], [ %i.inc, %bb ]
%scevgep = getelementptr i64, i64* %a, i64 %i %scevgep = getelementptr inbounds i64, i64* %a, i64 %i
store i64 %i, i64* %scevgep store i64 %i, i64* %scevgep
%i.inc = add nsw i64 %i, 1 %i.inc = add nsw i64 %i, 1
%exitcond = icmp eq i64 %i.inc, %N %exitcond = icmp eq i64 %i.inc, %N

View File

@ -0,0 +1,45 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Boundary Context:
; CHECK: [N] -> { : N <= 128 }
;
; #include <stdlib.h>
; #include <stdio.h>
;
; void __attribute__((noinline)) foo(float *A, long N) {
; for (long i = 0; i < N; i++)
; if ((signed char)i < 100)
; A[i] += i;
; }
define void @foo(float* %A, i64 %N) {
bb:
br label %bb1
bb1: ; preds = %bb11, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
%tmp = icmp slt i64 %i.0, %N
br i1 %tmp, label %bb2, label %bb13
bb2: ; preds = %bb1
%tmp3 = trunc i64 %i.0 to i8
%tmp4 = icmp slt i8 %tmp3, 100
br i1 %tmp4, label %bb5, label %bb10
bb5: ; preds = %bb2
%tmp6 = sitofp i64 %i.0 to float
%tmp7 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp8 = load float, float* %tmp7, align 4
%tmp9 = fadd float %tmp8, %tmp6
store float %tmp9, float* %tmp7, align 4
br label %bb10
bb10: ; preds = %bb5, %bb2
br label %bb11
bb11: ; preds = %bb10
%tmp12 = add nuw nsw i64 %i.0, 1
br label %bb1
bb13: ; preds = %bb1
ret void
}

View File

@ -19,7 +19,7 @@ bb:
br i1 %brcond, label %store, label %bb.backedge br i1 %brcond, label %store, label %bb.backedge
store: store:
%scevgep = getelementptr i64, i64* %a, i64 %i %scevgep = getelementptr inbounds i64, i64* %a, i64 %i
store i64 %i, i64* %scevgep store i64 %i, i64* %scevgep
br label %bb.backedge br label %bb.backedge

View File

@ -0,0 +1,71 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, char N, char p) {
; for (char i = 0; i < N; i++) {
; A[i + 3] = 0;
; }
; }
;
; The wrap function has no inbounds GEP but the nowrap function has. Therefore,
; we will add the assumption that i+1 won't overflow only to the former.
;
; CHECK: Function: wrap
; CHECK: Boundary Context:
; CHECK: [N] -> { : N <= 125 }
;
;
; FIXME: This is a negative test as nowrap should not need an assumed context.
; However %tmp5 in @nowrap is translated to the SCEV <3,+,1><nw><%bb2>
; which lacks the <nsw> flags we would need to avoid runtime checks.
;
; CHECK: Function: nowrap
; CHECK: Boundary Context:
; CHECK-NOT: [N] -> { : }
;
target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128"
define void @wrap(i32* %A, i8 %N, i8 %p) {
bb:
br label %bb2
bb2: ; preds = %bb7, %bb
%indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%tmp3 = icmp slt i8 %indvars.iv, %N
br i1 %tmp3, label %bb4, label %bb8
bb4: ; preds = %bb2
%tmp5 = add i8 %indvars.iv, 3
%tmp6 = getelementptr i32, i32* %A, i8 %tmp5
store i32 0, i32* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb4
%indvars.iv.next = add nsw nuw i8 %indvars.iv, 1
br label %bb2
bb8: ; preds = %bb2
ret void
}
define void @nowrap(i32* %A, i8 %N, i8 %p) {
bb:
br label %bb2
bb2: ; preds = %bb7, %bb
%indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%tmp3 = icmp slt i8 %indvars.iv, %N
br i1 %tmp3, label %bb4, label %bb8
bb4: ; preds = %bb2
%tmp5 = add nsw nuw i8 %indvars.iv, 3
%tmp6 = getelementptr inbounds i32, i32* %A, i8 %tmp5
store i32 0, i32* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb4
%indvars.iv.next = add nsw nuw i8 %indvars.iv, 1
br label %bb2
bb8: ; preds = %bb2
ret void
}

View File

@ -0,0 +1,72 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; void f(long *A, long N, long p) {
; for (long i = 0; i < N; i++)
; A[i + 1] = 0;
; }
;
; The wrap function has no inbounds GEP but the nowrap function has. Therefore,
; we will add the assumption that i+1 won't overflow only to the former.
;
; Note:
; 1152921504606846975 * sizeof(long) <= 2 ^ 63 - 1
; and
; 1152921504606846976 * sizeof(long) > 2 ^ 63 - 1
; with
; sizeof(long) == 8
;
; CHECK: Function: wrap
; CHECK: Boundary Context:
; CHECK: [N] -> { : N <= 1152921504606846975 }
;
; CHECK: Function: nowrap
; CHECK: Boundary Context:
; CHECK: [N] -> { : }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @wrap(i64* %A, i64 %N, i64 %p) {
bb:
br label %bb2
bb2: ; preds = %bb7, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%tmp3 = icmp slt i64 %indvars.iv, %N
br i1 %tmp3, label %bb4, label %bb8
bb4: ; preds = %bb2
%tmp5 = add nsw nuw i64 %indvars.iv, 1
%tmp6 = getelementptr i64, i64* %A, i64 %tmp5
store i64 0, i64* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb4
%indvars.iv.next = add nsw nuw i64 %indvars.iv, 1
br label %bb2
bb8: ; preds = %bb2
ret void
}
define void @nowrap(i64* %A, i64 %N, i64 %p) {
bb:
br label %bb2
bb2: ; preds = %bb7, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%tmp3 = icmp slt i64 %indvars.iv, %N
br i1 %tmp3, label %bb4, label %bb8
bb4: ; preds = %bb2
%tmp5 = add nsw nuw i64 %indvars.iv, 1
%tmp6 = getelementptr inbounds i64, i64* %A, i64 %tmp5
store i64 0, i64* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb4
%indvars.iv.next = add nsw nuw i64 %indvars.iv, 1
br label %bb2
bb8: ; preds = %bb2
ret void
}

View File

@ -0,0 +1,42 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, int N, int p) {
; for (int i = 0; i < N; i++)
; A[i + 30] = 0;
; }
;
; The wrap function has no inbounds GEP but the nowrap function has. Therefore,
; we will add the assumption that i+1 won't overflow only to the former.
;
; Note: 2147483618 + 30 == 2 ^ 31
;
; CHECK: Function: wrap
; CHECK: Context:
; CHECK: [N] -> { : N <= 2147483647 and N >= -2147483648 }
; CHECK: Boundary Context:
; CHECK: [N] -> { : N <= 2147483618 }
;
target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
define void @wrap(i32* %A, i32 %N, i32 %p) {
bb:
br label %bb2
bb2: ; preds = %bb7, %bb
%indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%tmp3 = icmp slt i32 %indvars.iv, %N
br i1 %tmp3, label %bb4, label %bb8
bb4: ; preds = %bb2
%tmp5 = add i32 %indvars.iv, 30
%tmp6 = getelementptr i32, i32* %A, i32 %tmp5
store i32 0, i32* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb4
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
br label %bb2
bb8: ; preds = %bb2
ret void
}

View File

@ -0,0 +1,37 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, int N, int p) {
; for (int i = 0; i < N; i++)
; A[i + p] = 0;
; }
;
; Note: 2147483648 == 2 ^ 31
;
; CHECK: Function: wrap
; CHECK: Boundary Context:
; CHECK: [N, p] -> { : p <= 2147483648 - N }
;
target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
define void @wrap(i32* %A, i32 %N, i32 %p) {
bb:
br label %bb2
bb2: ; preds = %bb7, %bb
%indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%tmp3 = icmp slt i32 %indvars.iv, %N
br i1 %tmp3, label %bb4, label %bb8
bb4: ; preds = %bb2
%tmp5 = add i32 %indvars.iv, %p
%tmp6 = getelementptr inbounds i32, i32* %A, i32 %tmp5
store i32 0, i32* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb4
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
br label %bb2
bb8: ; preds = %bb2
ret void
}

View File

@ -0,0 +1,37 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; void f(char *A, char N, char p) {
; for (char i = 0; i < N; i++)
; A[p-1] = 0;
; }
;
; CHECK: Function: wrap
; CHECK: Context:
; CHECK: [N, p] -> { : N <= 127 and N >= -128 and p <= 127 and p >= -128 }
; CHECK: Boundary Context:
; CHECK: [N, p] -> { : p >= -127 }
;
target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128"
define void @wrap(i8* %A, i8 %N, i8 %p) {
bb:
br label %bb2
bb2: ; preds = %bb7, %bb
%indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
%tmp3 = icmp slt i8 %indvars.iv, %N
br i1 %tmp3, label %bb4, label %bb8
bb4: ; preds = %bb2
%tmp5 = add i8 %p, -1
%tmp6 = getelementptr i8, i8* %A, i8 %tmp5
store i8 0, i8* %tmp6, align 4
br label %bb7
bb7: ; preds = %bb4
%indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
br label %bb2
bb8: ; preds = %bb2
ret void
}

View File

@ -0,0 +1,50 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; We should not generate runtime check for ((int)r1 + (int)r2) as it is known not
; to overflow. However (p + q) can, thus checks are needed.
;
; CHECK: Boundary Context:
; CHECK: [r1, r2, q, p] -> {
; CHECK-DAG: p <= 2147483647 - q
; CHECK-DAG: and
; CHECK-DAG: p >= -2147483648 - q
; CHECK-DAG: and
; CHECK-DAG: r2 <= 127 + r1
; CHECK: }
;
; void wraps(int *A, int p, short q, char r1, char r2) {
; for (char i = r1; i < r2; i++)
; A[p + q] = A[(int)r1 + (int)r2];
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @wraps(i32* %A, i32 %p, i16 signext %q, i8 signext %r1, i8 signext %r2) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i8 [ %r1, %entry ], [ %inc, %for.inc ]
%cmp = icmp slt i8 %i.0, %r2
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%conv3 = sext i8 %r1 to i64
%conv4 = sext i8 %r2 to i64
%add = add nsw i64 %conv3, %conv4
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
%tmp = load i32, i32* %arrayidx, align 4
%conv5 = sext i16 %q to i32
%add6 = add nsw i32 %conv5, %p
%idxprom7 = sext i32 %add6 to i64
%arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %idxprom7
store i32 %tmp, i32* %arrayidx8, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add i8 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,42 @@
; RUN: opt %loadPolly -polly-scops -polly-detect-unprofitable -analyze < %s | FileCheck %s
;
; CHECK: Boundary Context:
; CHECK: [N] -> { : N <= 128 }
;
; void foo(float *A, long N) {
; for (long i = 0; i < N; i++)
; if ((signed char)i < 100)
; A[i] += i;
; }
define void @foo(float* %A, i64 %N) {
bb:
br label %bb1
bb1: ; preds = %bb11, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
%tmp = icmp slt i64 %i.0, %N
br i1 %tmp, label %bb2, label %bb13
bb2: ; preds = %bb1
%tmp3 = trunc i64 %i.0 to i8
%tmp4 = icmp slt i8 %tmp3, 100
br i1 %tmp4, label %bb5, label %bb10
bb5: ; preds = %bb2
%tmp6 = sitofp i64 %i.0 to float
%tmp7 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp8 = load float, float* %tmp7, align 4
%tmp9 = fadd float %tmp8, %tmp6
store float %tmp9, float* %tmp7, align 4
br label %bb10
bb10: ; preds = %bb5, %bb2
br label %bb11
bb11: ; preds = %bb10
%tmp12 = add nuw nsw i64 %i.0, 1
br label %bb1
bb13: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,42 @@
; RUN: opt %loadPolly -polly-scops -polly-detect-unprofitable -analyze < %s | FileCheck %s
;
; CHECK: Boundary Context:
; CHECK: [N] -> { : N <= 128 }
;
; void foo(float *A, long N) {
; for (long i = 0; i < N;)
; if ((signed char)i++ < 100)
; A[i] += i;
; }
define void @foo(float* %A, i64 %N) {
bb:
br label %bb1
bb1: ; preds = %bb11, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
%tmp = icmp slt i64 %i.0, %N
br i1 %tmp, label %bb2, label %bb13
bb2: ; preds = %bb1
%tmp12 = add nuw nsw i64 %i.0, 1
%tmp3 = trunc i64 %i.0 to i8
%tmp4 = icmp slt i8 %tmp3, 100
br i1 %tmp4, label %bb5, label %bb10
bb5: ; preds = %bb2
%tmp6 = sitofp i64 %i.0 to float
%tmp7 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp8 = load float, float* %tmp7, align 4
%tmp9 = fadd float %tmp8, %tmp6
store float %tmp9, float* %tmp7, align 4
br label %bb10
bb10: ; preds = %bb5, %bb2
br label %bb11
bb11: ; preds = %bb10
br label %bb1
bb13: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,81 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; This checks that the no-wraps checks will be computed fast as some example
; already showed huge slowdowns even though the inbounds and nsw flags were
; all in place.
;
; // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark.
; void fast(char *A, char N, char M) {
; for (char i = 0; i < 8; i++) {
; short index0 = (short)(i + N);
; #ifdef fast
; short index1 = (index0 * 1) + (short)M;
; #else
; short index1 = (index0 * 16) + (short)M;
; #endif
; A[index1]++;
; }
; }
;
; CHECK: Function: fast
; CHECK: Function: slow
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @fast(i8* %A, i8 %N, i8 %M) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i8 %indvars.iv, 8
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp3 = add nsw i8 %indvars.iv, %N
%tmp3ext = sext i8 %tmp3 to i16
;%mul = mul nsw i16 %tmp3ext, 16
%Mext = sext i8 %M to i16
%add2 = add nsw i16 %tmp3ext, %Mext
%arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2
%tmp4 = load i8, i8* %arrayidx, align 4
%inc = add nsw i8 %tmp4, 1
store i8 %inc, i8* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
define void @slow(i8* %A, i8 %N, i8 %M) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i8 %indvars.iv, 8
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp3 = add nsw i8 %indvars.iv, %N
%tmp3ext = sext i8 %tmp3 to i16
%mul = mul nsw i16 %tmp3ext, 16
%Mext = sext i8 %M to i16
%add2 = add nsw i16 %mul, %Mext
%arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2
%tmp4 = load i8, i8* %arrayidx, align 4
%inc = add nsw i8 %tmp4, 1
store i8 %inc, i8* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,86 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; This checks that the no-wraps checks will be computed fast as some example
; already showed huge slowdowns even though the inbounds and nsw flags were
; all in place.
;
; // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark.
; void fast(char *A, char N, char M) {
; for (char i = 0; i < 8; i++) {
; char index0 = i + N;
; char index1 = index0 * 16;
; char index2 = index1 + M;
; A[(short)index2]++;
; }
; }
;
; void slow(char *A, char N, char M) {
; for (char i = 0; i < 8; i++) {
; char index0 = i + N;
; char index1 = index0 * 16;
; short index2 = ((short)index1) + ((short)M);
; A[index2]++;
; }
; }
;
; CHECK: Function: fast
; CHECK: Function: slow
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @fast(i8* %A, i8 %N, i8 %M) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i8 %indvars.iv, 8
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp3 = add nsw i8 %indvars.iv, %N
%mul = mul nsw i8 %tmp3, 16
%add2 = add nsw i8 %mul, %M
%add2ext = sext i8 %add2 to i16
%arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2ext
%tmp4 = load i8, i8* %arrayidx, align 4
%inc = add nsw i8 %tmp4, 1
store i8 %inc, i8* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
define void @slow(i8* %A, i8 %N, i8 %M) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i8 %indvars.iv, 8
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp3 = add nsw i8 %indvars.iv, %N
%mul = mul nsw i8 %tmp3, 16
%mulext = sext i8 %mul to i16
%Mext = sext i8 %M to i16
%add2 = add nsw i16 %mulext, %Mext
%arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2
%tmp4 = load i8, i8* %arrayidx, align 4
%inc = add nsw i8 %tmp4, 1
store i8 %inc, i8* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}