Identify and hoist definitively invariant loads

As a first step in the direction of assumed invariant loads (loads
  that are not written in some context) we now detect and hoist
  definitively invariant loads. These invariant loads will be preloaded
  in the code generation and used in the optimized version of the SCoP.
  If the load is only conditionally executed the preloaded version will
  also only be executed under the same condition, hence we will never
  access memory that wouldn't have been accessed otherwise. This is also
  the most distinguishing feature to licm.

  As hoisting can make statements empty we will simplify the SCoP and
  remove empty statements that would otherwise cause artifacts in the
  code generation.

Differential Revision: http://reviews.llvm.org/D13194

llvm-svn: 248861
This commit is contained in:
Johannes Doerfert 2015-09-29 23:47:21 +00:00
parent f6343d74ef
commit c1db67e218
23 changed files with 534 additions and 155 deletions

View File

@ -42,6 +42,9 @@ public:
void addParameters(__isl_take isl_set *Context);
void create(__isl_take isl_ast_node *Node);
/// @brief Preload all memory loads that are invariant.
void preloadInvariantLoads();
/// @brief Finalize code generation for the SCoP @p S.
///
/// @see BlockGenerator::finalizeSCoP(Scop &S)
@ -190,6 +193,21 @@ protected:
/// @param Mark The node we generate code for.
virtual void createMark(__isl_take isl_ast_node *Marker);
virtual void createFor(__isl_take isl_ast_node *For);
/// @brief Preload the memory load access @p MA.
///
/// If @p MA is not always executed it will be conditionally loaded and
/// merged with undef from the same type. Hence, if @p MA is executed only
/// under condition C then the preload code will look like this:
///
/// MA_preload = undef;
/// if (C)
/// MA_preload = load MA;
/// use MA_preload
Value *preloadInvariantLoad(const MemoryAccess &MA,
__isl_take isl_set *Domain,
__isl_keep isl_ast_build *Build);
void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
void createForSequential(__isl_take isl_ast_node *For);

View File

@ -127,6 +127,9 @@ public:
/// @brief Destructor to free the isl id of the base pointer.
~ScopArrayInfo();
/// @brief Set the base pointer to @p BP.
void setBasePtr(Value *BP) { BasePtr = BP; }
/// @brief Return the base pointer.
Value *getBasePtr() const { return BasePtr; }
@ -690,6 +693,15 @@ public:
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
MemoryAccess::ReductionType RT);
/// @brief Ordered list type to hold accesses.
using MemoryAccessList = std::forward_list<MemoryAccess *>;
/// @brief Type for invariant memory accesses and their domain context.
using InvariantAccessTy = std::pair<MemoryAccess *, isl_set *>;
/// @brief Type for multiple invariant memory accesses and their domain context.
using InvariantAccessesTy = SmallVector<InvariantAccessTy, 8>;
///===----------------------------------------------------------------------===//
/// @brief Statement of the Scop
///
@ -700,9 +712,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
/// At the moment every statement represents a single basic block of LLVM-IR.
class ScopStmt {
public:
/// @brief List to hold all (scalar) memory accesses mapped to an instruction.
using MemoryAccessList = std::forward_list<MemoryAccess *>;
ScopStmt(const ScopStmt &) = delete;
const ScopStmt &operator=(const ScopStmt &) = delete;
@ -880,6 +889,9 @@ public:
/// @brief Return true if this statement represents a whole region.
bool isRegionStmt() const { return R != nullptr; }
/// @brief Return true if this statement does not contain any accesses.
bool isEmpty() const { return MemAccs.empty(); }
/// @brief Return the (scalar) memory accesses for @p Inst.
const MemoryAccessList &getAccessesFor(const Instruction *Inst) const {
MemoryAccessList *MAL = lookupAccessesFor(Inst);
@ -913,6 +925,13 @@ public:
BB = Block;
}
/// @brief Move the memory access in @p InvMAs to @p TargetList.
///
/// Note that scalar accesses that are caused by any access in @p InvMAs will
/// be eliminated too.
void hoistMemoryAccesses(MemoryAccessList &InvMAs,
InvariantAccessesTy &TargetList);
typedef MemoryAccessVec::iterator iterator;
typedef MemoryAccessVec::const_iterator const_iterator;
@ -1023,7 +1042,7 @@ private:
/// Max loop depth.
unsigned MaxLoopDepth;
typedef std::deque<ScopStmt> StmtSet;
typedef std::list<ScopStmt> StmtSet;
/// The statements in this Scop.
StmtSet Stmts;
@ -1130,6 +1149,9 @@ private:
/// group to ensure the SCoP is executed in an alias free environment.
MinMaxVectorPairVectorTy MinMaxAliasGroups;
/// @brief List of invariant accesses.
InvariantAccessesTy InvariantAccesses;
/// @brief Scop constructor; invoked from ScopInfo::buildScop.
Scop(Region &R, AccFuncMapType &AccFuncMap, ScalarEvolution &SE,
DominatorTree &DT, isl_ctx *ctx, unsigned MaxLoopDepth);
@ -1183,6 +1205,15 @@ private:
/// @brief Add parameter constraints to @p C that imply a non-empty domain.
__isl_give isl_set *addNonEmptyDomainConstraints(__isl_take isl_set *C) const;
/// @brief Simplify the SCoP representation
///
/// At the moment we perform the following simplifications:
/// - removal of empty statements (due to invariant load hoisting)
void simplifySCoP();
/// @brief Hoist all invariant memory loads.
void hoistInvariantLoads();
/// @brief Build the Context of the Scop.
void buildContext();
@ -1313,6 +1344,11 @@ public:
/// @return The maximum depth of the loop.
inline unsigned getMaxLoopDepth() const { return MaxLoopDepth; }
/// @brief Return the set of invariant accesses.
const InvariantAccessesTy &getInvariantAccesses() const {
return InvariantAccesses;
}
/// @brief Mark the SCoP as optimized by the scheduler.
void markAsOptimized() { IsOptimized = true; }

View File

@ -1350,6 +1350,46 @@ void ScopStmt::print(raw_ostream &OS) const {
void ScopStmt::dump() const { print(dbgs()); }
void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs,
InvariantAccessesTy &TargetList) {
// Remove all memory accesses in @p InvMAs from this statement together
// with all scalar accesses that were caused by them. The tricky iteration
// order uses is needed because the MemAccs is a vector and the order in
// which the accesses of each memory access list (MAL) are stored in this
// vector is reversed.
for (MemoryAccess *MA : InvMAs) {
auto &MAL = *lookupAccessesFor(MA->getAccessInstruction());
MAL.reverse();
auto MALIt = MAL.begin();
auto MALEnd = MAL.end();
auto MemAccsIt = MemAccs.begin();
while (MALIt != MALEnd) {
while (*MemAccsIt != *MALIt)
MemAccsIt++;
MALIt++;
MemAccs.erase(MemAccsIt);
}
InstructionToAccess.erase(MA->getAccessInstruction());
delete &MAL;
}
// Get the context under which this statement, hence the memory accesses, are
// executed.
isl_set *DomainCtx = isl_set_params(getDomain());
DomainCtx = isl_set_remove_redundancies(DomainCtx);
DomainCtx = isl_set_detect_equalities(DomainCtx);
DomainCtx = isl_set_coalesce(DomainCtx);
for (MemoryAccess *MA : InvMAs)
TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx)));
isl_set_free(DomainCtx);
}
//===----------------------------------------------------------------------===//
/// Scop class implement
@ -2268,6 +2308,9 @@ void Scop::init(LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) {
buildBoundaryContext();
simplifyContexts();
buildAliasChecks(AA);
hoistInvariantLoads();
simplifySCoP();
}
Scop::~Scop() {
@ -2290,6 +2333,9 @@ Scop::~Scop() {
isl_pw_multi_aff_free(MMA.second);
}
}
for (const auto &IA : InvariantAccesses)
isl_set_free(IA.second);
}
void Scop::updateAccessDimensionality() {
@ -2298,6 +2344,81 @@ void Scop::updateAccessDimensionality() {
Access->updateDimensionality();
}
void Scop::simplifySCoP() {
for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) {
ScopStmt &Stmt = *StmtIt;
if (!StmtIt->isEmpty()) {
StmtIt++;
continue;
}
if (Stmt.isRegionStmt())
for (BasicBlock *BB : Stmt.getRegion()->blocks())
StmtMap.erase(BB);
else
StmtMap.erase(Stmt.getBasicBlock());
StmtIt = Stmts.erase(StmtIt);
}
}
void Scop::hoistInvariantLoads() {
isl_union_map *Writes = getWrites();
for (ScopStmt &Stmt : *this) {
// TODO: Loads that are not loop carried, hence are in a statement with
// zero iterators, are by construction invariant, though we
// currently "hoist" them anyway.
isl_set *Domain = Stmt.getDomain();
MemoryAccessList InvMAs;
for (MemoryAccess *MA : Stmt) {
if (MA->isImplicit() || MA->isWrite() || !MA->isAffine())
continue;
isl_map *AccessRelation = MA->getAccessRelation();
if (isl_map_involves_dims(AccessRelation, isl_dim_in, 0,
Stmt.getNumIterators())) {
isl_map_free(AccessRelation);
continue;
}
AccessRelation =
isl_map_intersect_domain(AccessRelation, isl_set_copy(Domain));
isl_set *AccessRange = isl_map_range(AccessRelation);
isl_union_map *Written = isl_union_map_intersect_range(
isl_union_map_copy(Writes), isl_union_set_from_set(AccessRange));
bool IsWritten = !isl_union_map_is_empty(Written);
isl_union_map_free(Written);
if (IsWritten)
continue;
InvMAs.push_front(MA);
}
// We inserted invariant accesses always in the front but need them to be
// sorted in a "natural order". The statements are already sorted in reverse
// post order and that suffices for the accesses too. The reason we require
// an order in the first place is the dependences between invariant loads
// that can be caused by indirect loads.
InvMAs.reverse();
// Transfer the memory access from the statement to the SCoP.
Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses);
isl_set_free(Domain);
}
isl_union_map_free(Writes);
if (!InvariantAccesses.empty())
IsOptimized = true;
}
const ScopArrayInfo *
Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType,
ArrayRef<const SCEV *> Sizes, bool IsPHI) {
@ -2478,6 +2599,12 @@ void Scop::print(raw_ostream &OS) const {
<< "\n";
OS.indent(4) << "Region: " << getNameStr() << "\n";
OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n";
OS.indent(4) << "Invariant Accesses: {\n";
for (const auto &IA : InvariantAccesses) {
IA.first->print(OS);
OS.indent(12) << "Execution Context: " << IA.second << "\n";
}
OS.indent(4) << "}\n";
printContext(OS.indent(4));
printArrayInfo(OS.indent(4));
printAliasAssumptions(OS);

View File

@ -108,6 +108,8 @@ Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old,
return const_cast<Value *>(Old);
if (Value *New = GlobalMap.lookup(Old)) {
if (Value *NewRemapped = GlobalMap.lookup(New))
New = NewRemapped;
if (Old->getType()->getScalarSizeInBits() <
New->getType()->getScalarSizeInBits())
New = Builder.CreateTruncOrBitCast(New, Old->getType());
@ -226,6 +228,9 @@ Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) {
Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load,
ValueMapT &BBMap, LoopToScevMapT &LTS,
isl_id_to_ast_expr *NewAccesses) {
if (Value *PreloadLoad = GlobalMap.lookup(Load))
return PreloadLoad;
const Value *Pointer = Load->getPointerOperand();
Value *NewPointer =
generateLocationAccessed(Stmt, Load, Pointer, BBMap, LTS, NewAccesses);
@ -762,6 +767,12 @@ Value *VectorBlockGenerator::generateUnknownStrideLoad(
void VectorBlockGenerator::generateLoad(
ScopStmt &Stmt, const LoadInst *Load, ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
if (Value *PreloadLoad = GlobalMap.lookup(Load)) {
VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
Load->getName() + "_p");
return;
}
if (!VectorType::isValidElementType(Load->getType())) {
for (int i = 0; i < getVectorWidth(); i++)
ScalarMaps[i][Load] =

View File

@ -146,8 +146,9 @@ public:
auto SplitBlock = StartBlock->getSinglePredecessor();
Builder.SetInsertPoint(SplitBlock->getTerminator());
NodeBuilder.addParameters(S.getContext());
NodeBuilder.preloadInvariantLoads();
Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder());
SplitBlock->getTerminator()->setOperand(0, RTC);
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
Builder.SetInsertPoint(StartBlock->begin());
NodeBuilder.create(AstRoot);

View File

@ -814,6 +814,123 @@ void IslNodeBuilder::create(__isl_take isl_ast_node *Node) {
llvm_unreachable("Unknown isl_ast_node type");
}
/// @brief Create the actual preload memory access for @p MA.
static inline Value *createPreloadLoad(Scop &S, const MemoryAccess &MA,
isl_ast_build *Build,
IslExprBuilder &ExprBuilder) {
isl_set *AccessRange = isl_map_range(MA.getAccessRelation());
isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext());
isl_ast_expr *Access =
isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
return ExprBuilder.create(Access);
}
Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
isl_set *Domain,
isl_ast_build *Build) {
isl_set *Universe = isl_set_universe(isl_set_get_space(Domain));
bool AlwaysExecuted = isl_set_is_equal(Domain, Universe);
isl_set_free(Universe);
if (AlwaysExecuted) {
isl_set_free(Domain);
return createPreloadLoad(S, MA, Build, ExprBuilder);
} else {
isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain);
Value *Cond = ExprBuilder.create(DomainCond);
if (!Cond->getType()->isIntegerTy(1))
Cond = Builder.CreateIsNotNull(Cond);
BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
Builder.GetInsertPoint(), &DT, &LI);
CondBB->setName("polly.preload.cond");
BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), &DT, &LI);
MergeBB->setName("polly.preload.merge");
Function *F = Builder.GetInsertBlock()->getParent();
LLVMContext &Context = F->getContext();
BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);
DT.addNewBlock(ExecBB, CondBB);
if (Loop *L = LI.getLoopFor(CondBB))
L->addBasicBlockToLoop(ExecBB, LI);
auto *CondBBTerminator = CondBB->getTerminator();
Builder.SetInsertPoint(CondBBTerminator);
Builder.CreateCondBr(Cond, ExecBB, MergeBB);
CondBBTerminator->eraseFromParent();
Builder.SetInsertPoint(ExecBB);
Builder.CreateBr(MergeBB);
Builder.SetInsertPoint(ExecBB->getTerminator());
Instruction *AccInst = MA.getAccessInstruction();
Type *AccInstTy = AccInst->getType();
Value *PreAccInst = createPreloadLoad(S, MA, Build, ExprBuilder);
Builder.SetInsertPoint(MergeBB->getTerminator());
auto *MergePHI = Builder.CreatePHI(
AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge");
MergePHI->addIncoming(PreAccInst, ExecBB);
MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB);
return MergePHI;
}
}
void IslNodeBuilder::preloadInvariantLoads() {
const auto &InvAccList = S.getInvariantAccesses();
if (InvAccList.empty())
return;
const Region &R = S.getRegion();
BasicBlock *PreLoadBB =
SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI);
PreLoadBB->setName("polly.preload.begin");
Builder.SetInsertPoint(PreLoadBB->begin());
isl_ast_build *Build =
isl_ast_build_from_context(isl_set_universe(S.getParamSpace()));
for (const auto &IA : InvAccList) {
MemoryAccess *MA = IA.first;
assert(!MA->isImplicit());
isl_set *Domain = isl_set_copy(IA.second);
Instruction *AccInst = MA->getAccessInstruction();
Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build);
ValueMap[AccInst] = PreloadVal;
if (SE.isSCEVable(AccInst->getType())) {
isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst));
if (ParamId)
IDToValue[ParamId] = PreloadVal;
isl_id_free(ParamId);
}
SmallVector<Instruction *, 4> Users;
for (auto *U : AccInst->users())
if (Instruction *UI = dyn_cast<Instruction>(U))
if (!R.contains(UI))
Users.push_back(UI);
for (auto *U : Users)
U->replaceUsesOfWith(AccInst, PreloadVal);
auto *SAI = S.getScopArrayInfo(MA->getBaseAddr());
for (auto *DerivedSAI : SAI->getDerivedSAIs())
DerivedSAI->setBasePtr(PreloadVal);
}
isl_ast_build_free(Build);
}
void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
for (unsigned i = 0; i < isl_set_dim(Context, isl_dim_param); ++i) {

View File

@ -6,6 +6,7 @@
; }
;
; CHECK: sext i32 %c to i64
; CHECK: sext i32 %c to i64
; CHECK: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64
; CHECK: %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15
; CHECK: %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64
@ -23,7 +24,7 @@
; CHECK: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]]
; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]]
; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]]
; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %1, %[[NoAlias]]
; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %3, %[[NoAlias]]
; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -6,7 +6,7 @@
;
; void exprModDiv(float *A, float *B, float *C, long N, long p) {
; for (long i = 0; i < N; i++)
; C[i] += A[i] + B[i] + A[p] + B[p];
; C[i] += A[i] + B[i] + A[i] + B[i + p];
; }
;
;
@ -32,21 +32,21 @@
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 127 * floord(-p - 1, 127) + 127]
; CHECK: %20 = sub nsw i64 0, %p
; CHECK: %21 = sub nsw i64 %20, 1
; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127
; CHECK: %17 = sub nsw i64 0, %p
; CHECK: %18 = sub nsw i64 %17, 1
; CHECK: %pexp.fdiv_q.0 = sub i64 %18, 127
; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1
; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0
; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21
; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %18, 0
; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %18
; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127
; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4
; CHECK: %23 = add nsw i64 %p, %22
; CHECK: %24 = add nsw i64 %23, 127
; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24
; CHECK: %19 = mul nsw i64 127, %pexp.fdiv_q.4
; CHECK: %20 = add nsw i64 %p, %19
; CHECK: %21 = add nsw i64 %20, 127
; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %21
; A[p / 127]
; CHECK: %pexp.div = sdiv exact i64 %p, 127
; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div
; A[i % 128]
; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128
@ -58,17 +58,17 @@
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 128 * floord(-p - 1, 128) + 128]
; POW2: %20 = sub nsw i64 0, %p
; POW2: %21 = sub nsw i64 %20, 1
; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7
; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr
; POW2: %23 = add nsw i64 %p, %22
; POW2: %24 = add nsw i64 %23, 128
; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24
; POW2: %17 = sub nsw i64 0, %p
; POW2: %18 = sub nsw i64 %17, 1
; POW2: %polly.fdiv_q.shr = ashr i64 %18, 7
; POW2: %19 = mul nsw i64 128, %polly.fdiv_q.shr
; POW2: %20 = add nsw i64 %p, %19
; POW2: %21 = add nsw i64 %20, 128
; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %21
; A[p / 128]
; POW2: %pexp.div = sdiv exact i64 %p, 128
; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@ -87,10 +87,11 @@ for.body: ; preds = %for.cond
%arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0
%tmp1 = load float, float* %arrayidx1, align 4
%add = fadd float %tmp, %tmp1
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %p
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp2 = load float, float* %arrayidx2, align 4
%add3 = fadd float %add, %tmp2
%arrayidx4 = getelementptr inbounds float, float* %B, i64 %p
%padd = add nsw i64 %p, %i.0
%arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd
%tmp3 = load float, float* %arrayidx4, align 4
%add5 = fadd float %add3, %tmp3
%arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0

View File

@ -0,0 +1,39 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s
;
; CHECK-LABEL: polly.preload.begin:
; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0
; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B
;
; CHECK-LABEL: polly.stmt.bb2:
; CHECK-NEXT: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar
; CHECK-NEXT: store i32 %polly.access.B.load, i32* %scevgep, align 4
;
; void f(int *restrict A, int *restrict B) {
; for (int i = 0; i < 1024; i++)
; A[i] = *B;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %A, i32* noalias %B) {
bb:
br label %bb1
bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb5
bb2: ; preds = %bb1
%tmp = load i32, i32* %B, align 4
%tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp, i32* %tmp3, align 4
br label %bb4
bb4: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb5: ; preds = %bb1
ret void
}

View File

@ -4,6 +4,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
%struct.wombat = type {[4 x i32]}
; CHECK: polly.preload.begin:
; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0
; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B
; CHECK-NOT: %polly.access.B.load = load i32, i32* %polly.access.B
; CHECK: polly.stmt.bb3.entry: ; preds = %polly.start
; CHECK: br label %polly.stmt.bb3
@ -14,8 +19,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: br label %polly.stmt.bb13.exit
; CHECK: polly.stmt.bb5: ; preds = %polly.stmt.bb3
; CHECK: %tmp7_p_scalar_ = load i32, i32* %B, !alias.scope !0, !noalias !2
; CHECK: store i32 %tmp7_p_scalar_, i32* %polly.access.cast.arg1, !alias.scope !3, !noalias !4
; CHECK: store i32 %polly.access.B.load, i32* %polly.access.cast.arg2
; CHECK: br label %polly.stmt.bb13.exit
; Function Attrs: nounwind uwtable

View File

@ -1,62 +0,0 @@
; RUN: opt %loadPolly -disable-basicaa -polly-detect-unprofitable -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s
;
; This caused an lnt crash at some point, just verify it will run through and
; produce the PHI node in the exit we are looking for.
;
; CHECK-LABEL: polly.merge_new_and_old:
; CHECK-NEXT: %.merge = phi %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* [ %.final_reload, %polly.stmt.for.end.298 ], [ %13, %for.end.298 ]
;
%struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823*, %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
%struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823 = type { i32, i32, [100 x %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*], i32, float, float, float }
%struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822 = type { i32, i32, i32, i32, i32, i32, %struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818*, %struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820*, %struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
%struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818 = type { %struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816*, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 }
%struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816 = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
%struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
%struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820 = type { [3 x [11 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [9 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [10 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [6 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819] }
%struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819 = type { i16, i8, i64 }
%struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821 = type { [2 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]] }
%struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824 = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
%struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825 = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825* }
@img = external global %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, align 8
; Function Attrs: nounwind uwtable
define void @intrapred_luma() #0 {
entry:
%PredPel = alloca [13 x i16], align 16
br label %for.body
for.body: ; preds = %for.body, %entry
br i1 undef, label %for.body, label %for.body.262
for.body.262: ; preds = %for.body
%0 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8
br label %for.body.280
for.body.280: ; preds = %for.body.280, %for.body.262
%indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ]
%arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1
%arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66
%1 = load i16, i16* %arrayidx283, align 2
%arrayidx289 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66
store i16 %1, i16* %arrayidx289, align 2
%indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1
br i1 false, label %for.body.280, label %for.end.298
for.end.298: ; preds = %for.body.280
%2 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8
br label %for.body.310
for.body.310: ; preds = %for.body.310, %for.end.298
%indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ]
%arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 9
%arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv
%3 = load i16, i16* %arrayidx313, align 2
%arrayidx322 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1
store i16 %3, i16* %arrayidx322, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br i1 false, label %for.body.310, label %for.end.328
for.end.328: ; preds = %for.body.310
ret void
}

View File

@ -24,16 +24,10 @@ return:
ret void
}
; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) [[NUW]]
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0
; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1
; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2

View File

@ -24,19 +24,13 @@ return:
ret void
}
; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) [[NUW]]
; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0
; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1
; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2
; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3
; CHECK: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
; CHECK: %0 = insertelement <4 x float**> undef, float** %p_result, i32 0
; CHECK: %1 = insertelement <4 x float**> %0, float** %p_result1, i32 1
; CHECK: %2 = insertelement <4 x float**> %1, float** %p_result2, i32 2
; CHECK: %3 = insertelement <4 x float**> %2, float** %p_result3, i32 3
; CHECK: store <4 x float**> %3, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align
; CHECK: attributes [[NUW]] = { nounwind }

View File

@ -28,8 +28,10 @@ bb4: ; preds = %bb1
ret void
}
; CHECK: %tmp_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8, !alias.scope !0, !noalias !2
; CHECK: %tmp_p_splat = shufflevector <1 x float> %tmp_p_splat_one, <1 x float> %tmp_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = fpext <4 x float> %tmp_p_splat to <4 x double>
; CHECK: store <4 x double> %0, <4 x double>* bitcast ([1024 x double]* @B to <4 x double>*), align 8, !alias.scope !3, !noalias !4
; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0)
; CHECK: polly.stmt.bb2: ; preds = %polly.start
; CHECK: %tmp_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0
; CHECK: %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
; CHECK: %0 = fpext <4 x float> %tmp_p.splat to <4 x double>
; CHECK: store <4 x double> %0, <4 x double>*

View File

@ -52,5 +52,8 @@ define i32 @main() nounwind {
}
; CHECK: load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*)
; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer
; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0)
; CHECK: polly.stmt.: ; preds = %polly.start
; CHECK: %_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0
; CHECK: %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer

View File

@ -22,6 +22,9 @@ body:
return:
ret void
}
; CHECK: %value_p_splat_one = load <1 x float**>, <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8
; CHECK: %.load = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i32 0, i32 0)
; CHECK-NOT: load <1 x float**>
; CHECK: %value_p.splatinsert = insertelement <4 x float**> undef, float** %.load, i32 0
; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> undef, <4 x i32> zeroinitializer
; CHECK: store <4 x float**> %value_p.splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8

View File

@ -21,6 +21,7 @@ entry:
for.0:
%Scalar0.val = load i32, i32* %Scalar0
store i32 1, i32* %Scalar0
br i1 false, label %for.0, label %for.1.preheader
for.1.preheader:

View File

@ -14,6 +14,10 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; Function Attrs: nounwind
; CHECK: Invariant
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_init_ptr[0]
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
entry:
br label %for.i
@ -25,11 +29,7 @@ for.i: ; preds = %for.i.end, %entry
entry.next: ; preds = %for.i
%init = load i64, i64* %init_ptr
; CHECK-LABEL: Stmt_entry_next
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] };
; CHECK-NOT: Stmt_entry_next
br label %for.j
for.j: ; preds = %for.j, %entry.next

View File

@ -14,7 +14,12 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; Function Attrs: nounwind
; CHECK: Invariant Accesses: {
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: MemRef_init_ptr[0]
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK: MemRef_init_ptr[0]
; CHECK: }
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
entry:
br label %for.i
@ -26,23 +31,17 @@ for.i: ; preds = %for.i.end, %entry
entry.next: ; preds = %for.i
%init = load i64, i64* %init_ptr
; CHECK-LABEL: Stmt_entry_next
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] };
; CHECK-NOT: Stmt_entry_next
br label %for.j
for.j: ; preds = %for.j, %entry.next
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
%init_2 = load i64, i64* %init_ptr
%init_sum = add i64 %init, %init_2
; CHECK-LABEL: Stmt_for_j
; CHECK: Stmt_for_j
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
store i64 %init_sum, i64* %scevgep

View File

@ -14,6 +14,9 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; Function Attrs: nounwind
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] };
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 {
entry:
br label %for.i
@ -32,11 +35,12 @@ for.j: ; preds = %for.j, %entry.next
%init_plus_two = add i64 %init, 2
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
store i64 %init_plus_two, i64* %scevgep
; CHECK-LABEL: Stmt_for_j
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
; CHECK: Statements {
; CHECK-NEXT: Stmt_for_j
; CHECK-NOT: ReadAccess
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
; CHECK-NEXT: }
%indvar.j.next = add nsw i64 %indvar.j, 1
%exitcond.j = icmp eq i64 %indvar.j.next, %N
br i1 %exitcond.j, label %for.i.end, label %for.j

View File

@ -0,0 +1,35 @@
; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_B[0] };
;
; void f(int *restrict A, int *restrict B) {
; for (int i = 0; i < 1024; i++)
; A[i] = *B;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* noalias %A, i32* noalias %B) {
bb:
br label %bb1
bb1: ; preds = %bb4, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %bb2, label %bb5
bb2: ; preds = %bb1
%tmp = load i32, i32* %B, align 4
%tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 %tmp, i32* %tmp3, align 4
br label %bb4
bb4: ; preds = %bb2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb5: ; preds = %bb1
ret void
}

View File

@ -0,0 +1,52 @@
; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-detect-unprofitable -analyze < %s | FileCheck %s
;
; CHECK: Invariant Accesses:
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_bb5[i0] -> MemRef_BP[0] };
; CHECK-NEXT: Execution Context: [N] -> { : N >= 514 }
;
; void f(int *BP, int *A, int N) {
; for (int i = 0; i < N; i++)
; if (i > 512)
; A[i] = *BP;
; else
; A[i] = 0;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %BP, i32* %A, i32 %N) {
bb:
%tmp = sext i32 %N to i64
br label %bb1
bb1: ; preds = %bb11, %bb
%indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ]
%tmp2 = icmp slt i64 %indvars.iv, %tmp
br i1 %tmp2, label %bb3, label %bb12
bb3: ; preds = %bb1
%tmp4 = icmp sgt i64 %indvars.iv, 512
br i1 %tmp4, label %bb5, label %bb8
bb5: ; preds = %bb3
%tmp9a = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%inv = load i32, i32 *%BP
store i32 %inv, i32* %tmp9a, align 4
br label %bb10
bb8: ; preds = %bb3
%tmp9b = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
store i32 0, i32* %tmp9b, align 4
br label %bb10
bb10: ; preds = %bb8, %bb5
br label %bb11
bb11: ; preds = %bb10
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %bb1
bb12: ; preds = %bb1
ret void
}

View File

@ -14,6 +14,10 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; CHECK-LABEL: Function: f
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_init_ptr[0]
; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 }
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
entry:
br label %for.i
@ -24,12 +28,8 @@ for.i:
br label %entry.next
entry.next:
; CHECK: Stmt_entry_next
; CHECK-NOT: Stmt_entry_next
%init = load i64, i64* %init_ptr
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] };
br label %for.j
for.j:
@ -55,6 +55,9 @@ return:
}
; CHECK-LABEL: Function: g
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: MemRef_init_ptr[0]
; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 }
define void @g(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
entry:
br label %for.i
@ -65,12 +68,8 @@ for.i:
br label %entry.next
entry.next:
; CHECK: Stmt_entry_next
; CHECK-NOT: Stmt_entry_next
%init = load i64, i64* %init_ptr
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] };
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] };
br label %for.j
for.j: