From c1db67e218ae22ab626ba75a5f6329786ca84c63 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 29 Sep 2015 23:47:21 +0000 Subject: [PATCH] Identify and hoist definitively invariant loads As a first step in the direction of assumed invariant loads (loads that are not written in some context) we now detect and hoist definitively invariant loads. These invariant loads will be preloaded in the code generation and used in the optimized version of the SCoP. If the load is only conditionally executed the preloaded version will also only be executed under the same condition, hence we will never access memory that wouldn't have been accessed otherwise. This is also the most distinguishing feature to licm. As hoisting can make statements empty we will simplify the SCoP and remove empty statements that would otherwise cause artifacts in the code generation. Differential Revision: http://reviews.llvm.org/D13194 llvm-svn: 248861 --- polly/include/polly/CodeGen/IslNodeBuilder.h | 18 +++ polly/include/polly/ScopInfo.h | 44 +++++- polly/lib/Analysis/ScopInfo.cpp | 127 ++++++++++++++++++ polly/lib/CodeGen/BlockGenerators.cpp | 11 ++ polly/lib/CodeGen/CodeGeneration.cpp | 3 +- polly/lib/CodeGen/IslNodeBuilder.cpp | 117 ++++++++++++++++ .../CodeGen/aliasing_parametric_simple_2.ll | 3 +- polly/test/Isl/CodeGen/exprModDiv.ll | 43 +++--- polly/test/Isl/CodeGen/invariant_load.ll | 39 ++++++ .../CodeGen/non-affine-phi-node-expansion.ll | 8 +- .../phi_in_exit_early_lnt_failure_4.ll | 62 --------- polly/test/Isl/CodeGen/simple_vec_call.ll | 14 +- polly/test/Isl/CodeGen/simple_vec_call_2.ll | 24 ++-- polly/test/Isl/CodeGen/simple_vec_cast.ll | 10 +- polly/test/Isl/CodeGen/simple_vec_const.ll | 7 +- .../test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll | 9 +- polly/test/Isl/CodeGen/two-scops-in-row.ll | 1 + polly/test/ScopInfo/inter_bb_scalar_dep.ll | 10 +- .../ScopInfo/intra_and_inter_bb_scalar_dep.ll | 19 ++- polly/test/ScopInfo/intra_bb_scalar_dep.ll | 14 +- polly/test/ScopInfo/invariant_load.ll | 35 +++++ ...ariant_load_base_pointer_in_conditional.ll | 52 +++++++ polly/test/ScopInfo/tempscop-printing.ll | 19 ++- 23 files changed, 534 insertions(+), 155 deletions(-) create mode 100644 polly/test/Isl/CodeGen/invariant_load.ll delete mode 100644 polly/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll create mode 100644 polly/test/ScopInfo/invariant_load.ll create mode 100644 polly/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h index 616d5c3fc62a..2b65c3e3ec27 100644 --- a/polly/include/polly/CodeGen/IslNodeBuilder.h +++ b/polly/include/polly/CodeGen/IslNodeBuilder.h @@ -42,6 +42,9 @@ public: void addParameters(__isl_take isl_set *Context); void create(__isl_take isl_ast_node *Node); + /// @brief Preload all memory loads that are invariant. + void preloadInvariantLoads(); + /// @brief Finalize code generation for the SCoP @p S. /// /// @see BlockGenerator::finalizeSCoP(Scop &S) @@ -190,6 +193,21 @@ protected: /// @param Mark The node we generate code for. virtual void createMark(__isl_take isl_ast_node *Marker); virtual void createFor(__isl_take isl_ast_node *For); + + /// @brief Preload the memory load access @p MA. + /// + /// If @p MA is not always executed it will be conditionally loaded and + /// merged with undef from the same type. Hence, if @p MA is executed only + /// under condition C then the preload code will look like this: + /// + /// MA_preload = undef; + /// if (C) + /// MA_preload = load MA; + /// use MA_preload + Value *preloadInvariantLoad(const MemoryAccess &MA, + __isl_take isl_set *Domain, + __isl_keep isl_ast_build *Build); + void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index aa86d81e1fc3..24bb9c5b1f11 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -127,6 +127,9 @@ public: /// @brief Destructor to free the isl id of the base pointer. ~ScopArrayInfo(); + /// @brief Set the base pointer to @p BP. + void setBasePtr(Value *BP) { BasePtr = BP; } + /// @brief Return the base pointer. Value *getBasePtr() const { return BasePtr; } @@ -690,6 +693,15 @@ public: llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, MemoryAccess::ReductionType RT); +/// @brief Ordered list type to hold accesses. +using MemoryAccessList = std::forward_list; + +/// @brief Type for invariant memory accesses and their domain context. +using InvariantAccessTy = std::pair; + +/// @brief Type for multiple invariant memory accesses and their domain context. +using InvariantAccessesTy = SmallVector; + ///===----------------------------------------------------------------------===// /// @brief Statement of the Scop /// @@ -700,9 +712,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, /// At the moment every statement represents a single basic block of LLVM-IR. class ScopStmt { public: - /// @brief List to hold all (scalar) memory accesses mapped to an instruction. - using MemoryAccessList = std::forward_list; - ScopStmt(const ScopStmt &) = delete; const ScopStmt &operator=(const ScopStmt &) = delete; @@ -880,6 +889,9 @@ public: /// @brief Return true if this statement represents a whole region. bool isRegionStmt() const { return R != nullptr; } + /// @brief Return true if this statement does not contain any accesses. + bool isEmpty() const { return MemAccs.empty(); } + /// @brief Return the (scalar) memory accesses for @p Inst. const MemoryAccessList &getAccessesFor(const Instruction *Inst) const { MemoryAccessList *MAL = lookupAccessesFor(Inst); @@ -913,6 +925,13 @@ public: BB = Block; } + /// @brief Move the memory access in @p InvMAs to @p TargetList. + /// + /// Note that scalar accesses that are caused by any access in @p InvMAs will + /// be eliminated too. + void hoistMemoryAccesses(MemoryAccessList &InvMAs, + InvariantAccessesTy &TargetList); + typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1023,7 +1042,7 @@ private: /// Max loop depth. unsigned MaxLoopDepth; - typedef std::deque StmtSet; + typedef std::list StmtSet; /// The statements in this Scop. StmtSet Stmts; @@ -1130,6 +1149,9 @@ private: /// group to ensure the SCoP is executed in an alias free environment. MinMaxVectorPairVectorTy MinMaxAliasGroups; + /// @brief List of invariant accesses. + InvariantAccessesTy InvariantAccesses; + /// @brief Scop constructor; invoked from ScopInfo::buildScop. Scop(Region &R, AccFuncMapType &AccFuncMap, ScalarEvolution &SE, DominatorTree &DT, isl_ctx *ctx, unsigned MaxLoopDepth); @@ -1183,6 +1205,15 @@ private: /// @brief Add parameter constraints to @p C that imply a non-empty domain. __isl_give isl_set *addNonEmptyDomainConstraints(__isl_take isl_set *C) const; + /// @brief Simplify the SCoP representation + /// + /// At the moment we perform the following simplifications: + /// - removal of empty statements (due to invariant load hoisting) + void simplifySCoP(); + + /// @brief Hoist all invariant memory loads. + void hoistInvariantLoads(); + /// @brief Build the Context of the Scop. void buildContext(); @@ -1313,6 +1344,11 @@ public: /// @return The maximum depth of the loop. inline unsigned getMaxLoopDepth() const { return MaxLoopDepth; } + /// @brief Return the set of invariant accesses. + const InvariantAccessesTy &getInvariantAccesses() const { + return InvariantAccesses; + } + /// @brief Mark the SCoP as optimized by the scheduler. void markAsOptimized() { IsOptimized = true; } diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 232c03b077cc..6f3b8520bf9c 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -1350,6 +1350,46 @@ void ScopStmt::print(raw_ostream &OS) const { void ScopStmt::dump() const { print(dbgs()); } +void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, + InvariantAccessesTy &TargetList) { + + // Remove all memory accesses in @p InvMAs from this statement together + // with all scalar accesses that were caused by them. The tricky iteration + // order uses is needed because the MemAccs is a vector and the order in + // which the accesses of each memory access list (MAL) are stored in this + // vector is reversed. + for (MemoryAccess *MA : InvMAs) { + auto &MAL = *lookupAccessesFor(MA->getAccessInstruction()); + MAL.reverse(); + + auto MALIt = MAL.begin(); + auto MALEnd = MAL.end(); + auto MemAccsIt = MemAccs.begin(); + while (MALIt != MALEnd) { + while (*MemAccsIt != *MALIt) + MemAccsIt++; + + MALIt++; + MemAccs.erase(MemAccsIt); + } + + InstructionToAccess.erase(MA->getAccessInstruction()); + delete &MAL; + } + + // Get the context under which this statement, hence the memory accesses, are + // executed. + isl_set *DomainCtx = isl_set_params(getDomain()); + DomainCtx = isl_set_remove_redundancies(DomainCtx); + DomainCtx = isl_set_detect_equalities(DomainCtx); + DomainCtx = isl_set_coalesce(DomainCtx); + + for (MemoryAccess *MA : InvMAs) + TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx))); + + isl_set_free(DomainCtx); +} + //===----------------------------------------------------------------------===// /// Scop class implement @@ -2268,6 +2308,9 @@ void Scop::init(LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) { buildBoundaryContext(); simplifyContexts(); buildAliasChecks(AA); + + hoistInvariantLoads(); + simplifySCoP(); } Scop::~Scop() { @@ -2290,6 +2333,9 @@ Scop::~Scop() { isl_pw_multi_aff_free(MMA.second); } } + + for (const auto &IA : InvariantAccesses) + isl_set_free(IA.second); } void Scop::updateAccessDimensionality() { @@ -2298,6 +2344,81 @@ void Scop::updateAccessDimensionality() { Access->updateDimensionality(); } +void Scop::simplifySCoP() { + + for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) { + ScopStmt &Stmt = *StmtIt; + + if (!StmtIt->isEmpty()) { + StmtIt++; + continue; + } + + if (Stmt.isRegionStmt()) + for (BasicBlock *BB : Stmt.getRegion()->blocks()) + StmtMap.erase(BB); + else + StmtMap.erase(Stmt.getBasicBlock()); + + StmtIt = Stmts.erase(StmtIt); + } +} + +void Scop::hoistInvariantLoads() { + isl_union_map *Writes = getWrites(); + for (ScopStmt &Stmt : *this) { + + // TODO: Loads that are not loop carried, hence are in a statement with + // zero iterators, are by construction invariant, though we + // currently "hoist" them anyway. + + isl_set *Domain = Stmt.getDomain(); + MemoryAccessList InvMAs; + + for (MemoryAccess *MA : Stmt) { + if (MA->isImplicit() || MA->isWrite() || !MA->isAffine()) + continue; + + isl_map *AccessRelation = MA->getAccessRelation(); + if (isl_map_involves_dims(AccessRelation, isl_dim_in, 0, + Stmt.getNumIterators())) { + isl_map_free(AccessRelation); + continue; + } + + AccessRelation = + isl_map_intersect_domain(AccessRelation, isl_set_copy(Domain)); + isl_set *AccessRange = isl_map_range(AccessRelation); + + isl_union_map *Written = isl_union_map_intersect_range( + isl_union_map_copy(Writes), isl_union_set_from_set(AccessRange)); + bool IsWritten = !isl_union_map_is_empty(Written); + isl_union_map_free(Written); + + if (IsWritten) + continue; + + InvMAs.push_front(MA); + } + + // We inserted invariant accesses always in the front but need them to be + // sorted in a "natural order". The statements are already sorted in reverse + // post order and that suffices for the accesses too. The reason we require + // an order in the first place is the dependences between invariant loads + // that can be caused by indirect loads. + InvMAs.reverse(); + + // Transfer the memory access from the statement to the SCoP. + Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses); + + isl_set_free(Domain); + } + isl_union_map_free(Writes); + + if (!InvariantAccesses.empty()) + IsOptimized = true; +} + const ScopArrayInfo * Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType, ArrayRef Sizes, bool IsPHI) { @@ -2478,6 +2599,12 @@ void Scop::print(raw_ostream &OS) const { << "\n"; OS.indent(4) << "Region: " << getNameStr() << "\n"; OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; + OS.indent(4) << "Invariant Accesses: {\n"; + for (const auto &IA : InvariantAccesses) { + IA.first->print(OS); + OS.indent(12) << "Execution Context: " << IA.second << "\n"; + } + OS.indent(4) << "}\n"; printContext(OS.indent(4)); printArrayInfo(OS.indent(4)); printAliasAssumptions(OS); diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index bae65907a41a..dc6c519ee18a 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -108,6 +108,8 @@ Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, return const_cast(Old); if (Value *New = GlobalMap.lookup(Old)) { + if (Value *NewRemapped = GlobalMap.lookup(New)) + New = NewRemapped; if (Old->getType()->getScalarSizeInBits() < New->getType()->getScalarSizeInBits()) New = Builder.CreateTruncOrBitCast(New, Old->getType()); @@ -226,6 +228,9 @@ Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { + if (Value *PreloadLoad = GlobalMap.lookup(Load)) + return PreloadLoad; + const Value *Pointer = Load->getPointerOperand(); Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, LTS, NewAccesses); @@ -762,6 +767,12 @@ Value *VectorBlockGenerator::generateUnknownStrideLoad( void VectorBlockGenerator::generateLoad( ScopStmt &Stmt, const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { + if (Value *PreloadLoad = GlobalMap.lookup(Load)) { + VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad, + Load->getName() + "_p"); + return; + } + if (!VectorType::isValidElementType(Load->getType())) { for (int i = 0; i < getVectorWidth(); i++) ScalarMaps[i][Load] = diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index 96d09387565c..53aac25a3db6 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -146,8 +146,9 @@ public: auto SplitBlock = StartBlock->getSinglePredecessor(); Builder.SetInsertPoint(SplitBlock->getTerminator()); NodeBuilder.addParameters(S.getContext()); + NodeBuilder.preloadInvariantLoads(); Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); - SplitBlock->getTerminator()->setOperand(0, RTC); + Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); Builder.SetInsertPoint(StartBlock->begin()); NodeBuilder.create(AstRoot); diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 1574e6167f09..83121eed0a07 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -814,6 +814,123 @@ void IslNodeBuilder::create(__isl_take isl_ast_node *Node) { llvm_unreachable("Unknown isl_ast_node type"); } +/// @brief Create the actual preload memory access for @p MA. +static inline Value *createPreloadLoad(Scop &S, const MemoryAccess &MA, + isl_ast_build *Build, + IslExprBuilder &ExprBuilder) { + isl_set *AccessRange = isl_map_range(MA.getAccessRelation()); + isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); + PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext()); + isl_ast_expr *Access = + isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); + return ExprBuilder.create(Access); +} + +Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, + isl_set *Domain, + isl_ast_build *Build) { + + isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); + bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); + isl_set_free(Universe); + + if (AlwaysExecuted) { + isl_set_free(Domain); + return createPreloadLoad(S, MA, Build, ExprBuilder); + } else { + + isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); + + Value *Cond = ExprBuilder.create(DomainCond); + if (!Cond->getType()->isIntegerTy(1)) + Cond = Builder.CreateIsNotNull(Cond); + + BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), + Builder.GetInsertPoint(), &DT, &LI); + CondBB->setName("polly.preload.cond"); + + BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), &DT, &LI); + MergeBB->setName("polly.preload.merge"); + + Function *F = Builder.GetInsertBlock()->getParent(); + LLVMContext &Context = F->getContext(); + BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); + + DT.addNewBlock(ExecBB, CondBB); + if (Loop *L = LI.getLoopFor(CondBB)) + L->addBasicBlockToLoop(ExecBB, LI); + + auto *CondBBTerminator = CondBB->getTerminator(); + Builder.SetInsertPoint(CondBBTerminator); + Builder.CreateCondBr(Cond, ExecBB, MergeBB); + CondBBTerminator->eraseFromParent(); + + Builder.SetInsertPoint(ExecBB); + Builder.CreateBr(MergeBB); + + Builder.SetInsertPoint(ExecBB->getTerminator()); + Instruction *AccInst = MA.getAccessInstruction(); + Type *AccInstTy = AccInst->getType(); + Value *PreAccInst = createPreloadLoad(S, MA, Build, ExprBuilder); + + Builder.SetInsertPoint(MergeBB->getTerminator()); + auto *MergePHI = Builder.CreatePHI( + AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); + MergePHI->addIncoming(PreAccInst, ExecBB); + MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); + + return MergePHI; + } +} + +void IslNodeBuilder::preloadInvariantLoads() { + + const auto &InvAccList = S.getInvariantAccesses(); + if (InvAccList.empty()) + return; + + const Region &R = S.getRegion(); + + BasicBlock *PreLoadBB = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); + PreLoadBB->setName("polly.preload.begin"); + Builder.SetInsertPoint(PreLoadBB->begin()); + + isl_ast_build *Build = + isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); + + for (const auto &IA : InvAccList) { + MemoryAccess *MA = IA.first; + assert(!MA->isImplicit()); + + isl_set *Domain = isl_set_copy(IA.second); + Instruction *AccInst = MA->getAccessInstruction(); + Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build); + ValueMap[AccInst] = PreloadVal; + + if (SE.isSCEVable(AccInst->getType())) { + isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); + if (ParamId) + IDToValue[ParamId] = PreloadVal; + isl_id_free(ParamId); + } + + SmallVector Users; + for (auto *U : AccInst->users()) + if (Instruction *UI = dyn_cast(U)) + if (!R.contains(UI)) + Users.push_back(UI); + for (auto *U : Users) + U->replaceUsesOfWith(AccInst, PreloadVal); + + auto *SAI = S.getScopArrayInfo(MA->getBaseAddr()); + for (auto *DerivedSAI : SAI->getDerivedSAIs()) + DerivedSAI->setBasePtr(PreloadVal); + } + + isl_ast_build_free(Build); +} + void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { for (unsigned i = 0; i < isl_set_dim(Context, isl_dim_param); ++i) { diff --git a/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll b/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll index ad956e6df434..696145998438 100644 --- a/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll +++ b/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll @@ -6,6 +6,7 @@ ; } ; ; CHECK: sext i32 %c to i64 +; CHECK: sext i32 %c to i64 ; CHECK: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 ; CHECK: %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15 ; CHECK: %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64 @@ -23,7 +24,7 @@ ; CHECK: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]] ; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] -; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %1, %[[NoAlias]] +; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %3, %[[NoAlias]] ; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/exprModDiv.ll b/polly/test/Isl/CodeGen/exprModDiv.ll index 42d555db2e39..ce59ad8fdf53 100644 --- a/polly/test/Isl/CodeGen/exprModDiv.ll +++ b/polly/test/Isl/CodeGen/exprModDiv.ll @@ -6,7 +6,7 @@ ; ; void exprModDiv(float *A, float *B, float *C, long N, long p) { ; for (long i = 0; i < N; i++) -; C[i] += A[i] + B[i] + A[p] + B[p]; +; C[i] += A[i] + B[i] + A[i] + B[i + p]; ; } ; ; @@ -32,21 +32,21 @@ ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 127 * floord(-p - 1, 127) + 127] -; CHECK: %20 = sub nsw i64 0, %p -; CHECK: %21 = sub nsw i64 %20, 1 -; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127 +; CHECK: %17 = sub nsw i64 0, %p +; CHECK: %18 = sub nsw i64 %17, 1 +; CHECK: %pexp.fdiv_q.0 = sub i64 %18, 127 ; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1 -; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0 -; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21 +; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %18, 0 +; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %18 ; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127 -; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4 -; CHECK: %23 = add nsw i64 %p, %22 -; CHECK: %24 = add nsw i64 %23, 127 -; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24 +; CHECK: %19 = mul nsw i64 127, %pexp.fdiv_q.4 +; CHECK: %20 = add nsw i64 %p, %19 +; CHECK: %21 = add nsw i64 %20, 127 +; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %21 ; A[p / 127] ; CHECK: %pexp.div = sdiv exact i64 %p, 127 -; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div +; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div ; A[i % 128] ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 @@ -58,17 +58,17 @@ ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 128 * floord(-p - 1, 128) + 128] -; POW2: %20 = sub nsw i64 0, %p -; POW2: %21 = sub nsw i64 %20, 1 -; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7 -; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr -; POW2: %23 = add nsw i64 %p, %22 -; POW2: %24 = add nsw i64 %23, 128 -; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24 +; POW2: %17 = sub nsw i64 0, %p +; POW2: %18 = sub nsw i64 %17, 1 +; POW2: %polly.fdiv_q.shr = ashr i64 %18, 7 +; POW2: %19 = mul nsw i64 128, %polly.fdiv_q.shr +; POW2: %20 = add nsw i64 %p, %19 +; POW2: %21 = add nsw i64 %20, 128 +; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %21 ; A[p / 128] ; POW2: %pexp.div = sdiv exact i64 %p, 128 -; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div +; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -87,10 +87,11 @@ for.body: ; preds = %for.cond %arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0 %tmp1 = load float, float* %arrayidx1, align 4 %add = fadd float %tmp, %tmp1 - %arrayidx2 = getelementptr inbounds float, float* %A, i64 %p + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0 %tmp2 = load float, float* %arrayidx2, align 4 %add3 = fadd float %add, %tmp2 - %arrayidx4 = getelementptr inbounds float, float* %B, i64 %p + %padd = add nsw i64 %p, %i.0 + %arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd %tmp3 = load float, float* %arrayidx4, align 4 %add5 = fadd float %add3, %tmp3 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0 diff --git a/polly/test/Isl/CodeGen/invariant_load.ll b/polly/test/Isl/CodeGen/invariant_load.ll new file mode 100644 index 000000000000..0e03614f430b --- /dev/null +++ b/polly/test/Isl/CodeGen/invariant_load.ll @@ -0,0 +1,39 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s +; +; CHECK-LABEL: polly.preload.begin: +; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0 +; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B +; +; CHECK-LABEL: polly.stmt.bb2: +; CHECK-NEXT: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK-NEXT: store i32 %polly.access.B.load, i32* %scevgep, align 4 +; +; void f(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i] = *B; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %B, align 4 + %tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} diff --git a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll index e880a3ecf98e..49d5e82e333b 100644 --- a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll +++ b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll @@ -4,6 +4,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %struct.wombat = type {[4 x i32]} +; CHECK: polly.preload.begin: +; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0 +; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B +; CHECK-NOT: %polly.access.B.load = load i32, i32* %polly.access.B + ; CHECK: polly.stmt.bb3.entry: ; preds = %polly.start ; CHECK: br label %polly.stmt.bb3 @@ -14,8 +19,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: br label %polly.stmt.bb13.exit ; CHECK: polly.stmt.bb5: ; preds = %polly.stmt.bb3 -; CHECK: %tmp7_p_scalar_ = load i32, i32* %B, !alias.scope !0, !noalias !2 -; CHECK: store i32 %tmp7_p_scalar_, i32* %polly.access.cast.arg1, !alias.scope !3, !noalias !4 +; CHECK: store i32 %polly.access.B.load, i32* %polly.access.cast.arg2 ; CHECK: br label %polly.stmt.bb13.exit ; Function Attrs: nounwind uwtable diff --git a/polly/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll b/polly/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll deleted file mode 100644 index 0e9d2a70af71..000000000000 --- a/polly/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll +++ /dev/null @@ -1,62 +0,0 @@ -; RUN: opt %loadPolly -disable-basicaa -polly-detect-unprofitable -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s -; -; This caused an lnt crash at some point, just verify it will run through and -; produce the PHI node in the exit we are looking for. -; -; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %.merge = phi %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* [ %.final_reload, %polly.stmt.for.end.298 ], [ %13, %for.end.298 ] -; -%struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823*, %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 } -%struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823 = type { i32, i32, [100 x %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*], i32, float, float, float } -%struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822 = type { i32, i32, i32, i32, i32, i32, %struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818*, %struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820*, %struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] } -%struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818 = type { %struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816*, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 } -%struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816 = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 } -%struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 } -%struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820 = type { [3 x [11 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [9 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [10 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [6 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819] } -%struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819 = type { i16, i8, i64 } -%struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821 = type { [2 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]] } -%struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824 = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 } -%struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825 = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825* } - -@img = external global %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, align 8 - -; Function Attrs: nounwind uwtable -define void @intrapred_luma() #0 { -entry: - %PredPel = alloca [13 x i16], align 16 - br label %for.body - -for.body: ; preds = %for.body, %entry - br i1 undef, label %for.body, label %for.body.262 - -for.body.262: ; preds = %for.body - %0 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8 - br label %for.body.280 - -for.body.280: ; preds = %for.body.280, %for.body.262 - %indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ] - %arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1 - %arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66 - %1 = load i16, i16* %arrayidx283, align 2 - %arrayidx289 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66 - store i16 %1, i16* %arrayidx289, align 2 - %indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1 - br i1 false, label %for.body.280, label %for.end.298 - -for.end.298: ; preds = %for.body.280 - %2 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8 - br label %for.body.310 - -for.body.310: ; preds = %for.body.310, %for.end.298 - %indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ] - %arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 9 - %arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv - %3 = load i16, i16* %arrayidx313, align 2 - %arrayidx322 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1 - store i16 %3, i16* %arrayidx322, align 2 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br i1 false, label %for.body.310, label %for.end.328 - -for.end.328: ; preds = %for.body.310 - ret void -} diff --git a/polly/test/Isl/CodeGen/simple_vec_call.ll b/polly/test/Isl/CodeGen/simple_vec_call.ll index a7b5d6bdbcf5..721f3c31abe2 100644 --- a/polly/test/Isl/CodeGen/simple_vec_call.ll +++ b/polly/test/Isl/CodeGen/simple_vec_call.ll @@ -24,16 +24,10 @@ return: ret void } -; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0 -; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1 -; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2 -; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3 -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) [[NUW]] +; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW:#[0-9]+]] +; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] ; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0 ; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1 ; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2 diff --git a/polly/test/Isl/CodeGen/simple_vec_call_2.ll b/polly/test/Isl/CodeGen/simple_vec_call_2.ll index 0b0a5128df22..d87a96e09960 100644 --- a/polly/test/Isl/CodeGen/simple_vec_call_2.ll +++ b/polly/test/Isl/CodeGen/simple_vec_call_2.ll @@ -24,19 +24,13 @@ return: ret void } -; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0 -; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1 -; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2 -; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3 -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) [[NUW]] -; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0 -; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1 -; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2 -; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3 -; CHECK: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align +; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW:#[0-9]+]] +; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: %0 = insertelement <4 x float**> undef, float** %p_result, i32 0 +; CHECK: %1 = insertelement <4 x float**> %0, float** %p_result1, i32 1 +; CHECK: %2 = insertelement <4 x float**> %1, float** %p_result2, i32 2 +; CHECK: %3 = insertelement <4 x float**> %2, float** %p_result3, i32 3 +; CHECK: store <4 x float**> %3, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/polly/test/Isl/CodeGen/simple_vec_cast.ll b/polly/test/Isl/CodeGen/simple_vec_cast.ll index e7501a1b83b2..a1f7e32b4356 100644 --- a/polly/test/Isl/CodeGen/simple_vec_cast.ll +++ b/polly/test/Isl/CodeGen/simple_vec_cast.ll @@ -28,8 +28,10 @@ bb4: ; preds = %bb1 ret void } -; CHECK: %tmp_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8, !alias.scope !0, !noalias !2 -; CHECK: %tmp_p_splat = shufflevector <1 x float> %tmp_p_splat_one, <1 x float> %tmp_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = fpext <4 x float> %tmp_p_splat to <4 x double> -; CHECK: store <4 x double> %0, <4 x double>* bitcast ([1024 x double]* @B to <4 x double>*), align 8, !alias.scope !3, !noalias !4 +; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) +; CHECK: polly.stmt.bb2: ; preds = %polly.start +; CHECK: %tmp_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0 +; CHECK: %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: %0 = fpext <4 x float> %tmp_p.splat to <4 x double> +; CHECK: store <4 x double> %0, <4 x double>* diff --git a/polly/test/Isl/CodeGen/simple_vec_const.ll b/polly/test/Isl/CodeGen/simple_vec_const.ll index b59b829dc0b3..e99303df6ced 100644 --- a/polly/test/Isl/CodeGen/simple_vec_const.ll +++ b/polly/test/Isl/CodeGen/simple_vec_const.ll @@ -52,5 +52,8 @@ define i32 @main() nounwind { } -; CHECK: load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*) -; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer +; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) + +; CHECK: polly.stmt.: ; preds = %polly.start +; CHECK: %_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0 +; CHECK: %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer diff --git a/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll b/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll index 343c017f40b9..fe558527e552 100644 --- a/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll +++ b/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll @@ -22,6 +22,9 @@ body: return: ret void } -; CHECK: %value_p_splat_one = load <1 x float**>, <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8 +; CHECK: %.load = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i32 0, i32 0) + +; CHECK-NOT: load <1 x float**> +; CHECK: %value_p.splatinsert = insertelement <4 x float**> undef, float** %.load, i32 0 +; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> undef, <4 x i32> zeroinitializer +; CHECK: store <4 x float**> %value_p.splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8 diff --git a/polly/test/Isl/CodeGen/two-scops-in-row.ll b/polly/test/Isl/CodeGen/two-scops-in-row.ll index 4fda7d40978f..c2552bf28a10 100644 --- a/polly/test/Isl/CodeGen/two-scops-in-row.ll +++ b/polly/test/Isl/CodeGen/two-scops-in-row.ll @@ -21,6 +21,7 @@ entry: for.0: %Scalar0.val = load i32, i32* %Scalar0 + store i32 1, i32* %Scalar0 br i1 false, label %for.0, label %for.1.preheader for.1.preheader: diff --git a/polly/test/ScopInfo/inter_bb_scalar_dep.ll b/polly/test/ScopInfo/inter_bb_scalar_dep.ll index f8d91fba5c47..d7a35d598a52 100644 --- a/polly/test/ScopInfo/inter_bb_scalar_dep.ll +++ b/polly/test/ScopInfo/inter_bb_scalar_dep.ll @@ -14,6 +14,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; Function Attrs: nounwind +; CHECK: Invariant +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] + define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -25,11 +29,7 @@ for.i: ; preds = %for.i.end, %entry entry.next: ; preds = %for.i %init = load i64, i64* %init_ptr -; CHECK-LABEL: Stmt_entry_next -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; +; CHECK-NOT: Stmt_entry_next br label %for.j for.j: ; preds = %for.j, %entry.next diff --git a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll index 3766e17d136d..f2ac3db81830 100644 --- a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -14,7 +14,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -; Function Attrs: nounwind +; CHECK: Invariant Accesses: { +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: MemRef_init_ptr[0] +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: MemRef_init_ptr[0] +; CHECK: } define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -26,23 +31,17 @@ for.i: ; preds = %for.i.end, %entry entry.next: ; preds = %for.i %init = load i64, i64* %init_ptr -; CHECK-LABEL: Stmt_entry_next -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; +; CHECK-NOT: Stmt_entry_next br label %for.j for.j: ; preds = %for.j, %entry.next %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] %init_2 = load i64, i64* %init_ptr %init_sum = add i64 %init, %init_2 -; CHECK-LABEL: Stmt_for_j +; CHECK: Stmt_for_j ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] }; -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; %scevgep = getelementptr i64, i64* %A, i64 %indvar.j store i64 %init_sum, i64* %scevgep diff --git a/polly/test/ScopInfo/intra_bb_scalar_dep.ll b/polly/test/ScopInfo/intra_bb_scalar_dep.ll index 5476077511ff..446a8a2714d2 100644 --- a/polly/test/ScopInfo/intra_bb_scalar_dep.ll +++ b/polly/test/ScopInfo/intra_bb_scalar_dep.ll @@ -14,6 +14,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; Function Attrs: nounwind +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -32,11 +35,12 @@ for.j: ; preds = %for.j, %entry.next %init_plus_two = add i64 %init, 2 %scevgep = getelementptr i64, i64* %A, i64 %indvar.j store i64 %init_plus_two, i64* %scevgep -; CHECK-LABEL: Stmt_for_j -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_j +; CHECK-NOT: ReadAccess +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; +; CHECK-NEXT: } %indvar.j.next = add nsw i64 %indvar.j, 1 %exitcond.j = icmp eq i64 %indvar.j.next, %N br i1 %exitcond.j, label %for.i.end, label %for.j diff --git a/polly/test/ScopInfo/invariant_load.ll b/polly/test/ScopInfo/invariant_load.ll new file mode 100644 index 000000000000..3ad9dd8486ff --- /dev/null +++ b/polly/test/ScopInfo/invariant_load.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_B[0] }; +; +; void f(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i] = *B; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %B, align 4 + %tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} diff --git a/polly/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll b/polly/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll new file mode 100644 index 000000000000..9d31c28bb90c --- /dev/null +++ b/polly/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-detect-unprofitable -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_bb5[i0] -> MemRef_BP[0] }; +; CHECK-NEXT: Execution Context: [N] -> { : N >= 514 } +; +; void f(int *BP, int *A, int N) { +; for (int i = 0; i < N; i++) +; if (i > 512) +; A[i] = *BP; +; else +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %BP, i32* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb1 + +bb1: ; preds = %bb11, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ] + %tmp2 = icmp slt i64 %indvars.iv, %tmp + br i1 %tmp2, label %bb3, label %bb12 + +bb3: ; preds = %bb1 + %tmp4 = icmp sgt i64 %indvars.iv, 512 + br i1 %tmp4, label %bb5, label %bb8 + +bb5: ; preds = %bb3 + %tmp9a = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %inv = load i32, i32 *%BP + store i32 %inv, i32* %tmp9a, align 4 + br label %bb10 + +bb8: ; preds = %bb3 + %tmp9b = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp9b, align 4 + br label %bb10 + +bb10: ; preds = %bb8, %bb5 + br label %bb11 + +bb11: ; preds = %bb10 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb12: ; preds = %bb1 + ret void +} diff --git a/polly/test/ScopInfo/tempscop-printing.ll b/polly/test/ScopInfo/tempscop-printing.ll index 52027fef257b..c537a2910761 100644 --- a/polly/test/ScopInfo/tempscop-printing.ll +++ b/polly/test/ScopInfo/tempscop-printing.ll @@ -14,6 +14,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; CHECK-LABEL: Function: f +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] +; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 } + define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { entry: br label %for.i @@ -24,12 +28,8 @@ for.i: br label %entry.next entry.next: -; CHECK: Stmt_entry_next +; CHECK-NOT: Stmt_entry_next %init = load i64, i64* %init_ptr -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; br label %for.j for.j: @@ -55,6 +55,9 @@ return: } ; CHECK-LABEL: Function: g +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] +; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 } define void @g(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { entry: br label %for.i @@ -65,12 +68,8 @@ for.i: br label %entry.next entry.next: -; CHECK: Stmt_entry_next +; CHECK-NOT: Stmt_entry_next %init = load i64, i64* %init_ptr -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; br label %for.j for.j: