diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 7edd1cf73dcc..871d35e99b74 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -659,11 +659,6 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap); -/// \brief Returns true if the memory operations \p A and \p B are consecutive. -/// This is a simple API that does not depend on the analysis pass. -bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, - ScalarEvolution &SE, bool CheckType = true); - /// \brief This analysis provides dependence information for the memory accesses /// of a loop. /// diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index a2ab231a62d5..84f9fa694636 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -901,78 +901,6 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, return Stride; } -/// Take the pointer operand from the Load/Store instruction. -/// Returns NULL if this is not a valid Load/Store instruction. -static Value *getPointerOperand(Value *I) { - if (LoadInst *LI = dyn_cast(I)) - return LI->getPointerOperand(); - if (StoreInst *SI = dyn_cast(I)) - return SI->getPointerOperand(); - return nullptr; -} - -/// Take the address space operand from the Load/Store instruction. -/// Returns -1 if this is not a valid Load/Store instruction. -static unsigned getAddressSpaceOperand(Value *I) { - if (LoadInst *L = dyn_cast(I)) - return L->getPointerAddressSpace(); - if (StoreInst *S = dyn_cast(I)) - return S->getPointerAddressSpace(); - return -1; -} - -/// Returns true if the memory operations \p A and \p B are consecutive. -bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, - ScalarEvolution &SE, bool CheckType) { - Value *PtrA = getPointerOperand(A); - Value *PtrB = getPointerOperand(B); - unsigned ASA = getAddressSpaceOperand(A); - unsigned ASB = getAddressSpaceOperand(B); - - // Check that the address spaces match and that the pointers are valid. - if (!PtrA || !PtrB || (ASA != ASB)) - return false; - - // Make sure that A and B are different pointers. - if (PtrA == PtrB) - return false; - - // Make sure that A and B have the same type if required. - if(CheckType && PtrA->getType() != PtrB->getType()) - return false; - - unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); - Type *Ty = cast(PtrA->getType())->getElementType(); - APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); - - APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); - PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); - PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); - - // OffsetDelta = OffsetB - OffsetA; - const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); - const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); - const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); - const SCEVConstant *OffsetDeltaC = dyn_cast(OffsetDeltaSCEV); - const APInt &OffsetDelta = OffsetDeltaC->getAPInt(); - // Check if they are based on the same pointer. That makes the offsets - // sufficient. - if (PtrA == PtrB) - return OffsetDelta == Size; - - // Compute the necessary base pointer delta to have the necessary final delta - // equal to the size. - // BaseDelta = Size - OffsetDelta; - const SCEV *SizeSCEV = SE.getConstant(Size); - const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV); - - // Otherwise compute the distance with SCEV between the base pointers. - const SCEV *PtrSCEVA = SE.getSCEV(PtrA); - const SCEV *PtrSCEVB = SE.getSCEV(PtrB); - const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta); - return X == PtrSCEVB; -} - bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { switch (Type) { case NoDep: diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index bb113ccf16ee..4521640e3947 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -26,20 +26,22 @@ // i64 and larger types when i64 is legal and the value has few bits set. It // would be good to enhance isel to emit a loop for ctpop in this case. // +// We should enhance the memset/memcpy recognition to handle multiple stores in +// the loop. This would handle things like: +// void foo(_Complex float *P) +// for (i) { __real__(*P) = 0; __imag__(*P) = 0; } +// // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -106,9 +108,7 @@ public: private: typedef SmallVector StoreList; - typedef MapVector StoreListMap; - StoreListMap StoreRefsForMemset; - StoreListMap StoreRefsForMemsetPattern; + StoreList StoreRefsForMemset; StoreList StoreRefsForMemcpy; bool HasMemset; bool HasMemsetPattern; @@ -122,18 +122,14 @@ private: SmallVectorImpl &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern, - bool &ForMemcpy); - bool processLoopStores(SmallVectorImpl &SL, const SCEV *BECount, - bool ForMemset); + bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy); + bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, - Instruction *TheStore, - SmallPtrSetImpl &Stores, - const SCEVAddRecExpr *Ev, const SCEV *BECount, - bool NegStride); + Instruction *TheStore, const SCEVAddRecExpr *Ev, + const SCEV *BECount, bool NegStride); bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); /// @} @@ -309,7 +305,7 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { } bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, - bool &ForMemsetPattern, bool &ForMemcpy) { + bool &ForMemcpy) { // Don't touch volatile stores. if (!SI->isSimple()) return false; @@ -357,7 +353,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, StorePtr->getType()->getPointerAddressSpace() == 0 && (PatternValue = getMemSetPatternValue(StoredVal, DL))) { // It looks like we can use PatternValue! - ForMemsetPattern = true; + ForMemset = true; return true; } @@ -397,7 +393,6 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, void LoopIdiomRecognize::collectStores(BasicBlock *BB) { StoreRefsForMemset.clear(); - StoreRefsForMemsetPattern.clear(); StoreRefsForMemcpy.clear(); for (Instruction &I : *BB) { StoreInst *SI = dyn_cast(&I); @@ -405,22 +400,15 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) { continue; bool ForMemset = false; - bool ForMemsetPattern = false; bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy)) + if (!isLegalStore(SI, ForMemset, ForMemcpy)) continue; // Save the store locations. - if (ForMemset) { - // Find the base pointer. - Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); - StoreRefsForMemset[Ptr].push_back(SI); - } else if (ForMemsetPattern) { - // Find the base pointer. - Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); - StoreRefsForMemsetPattern[Ptr].push_back(SI); - } else if (ForMemcpy) + if (ForMemset) + StoreRefsForMemset.push_back(SI); + else if (ForMemcpy) StoreRefsForMemcpy.push_back(SI); } } @@ -442,14 +430,9 @@ bool LoopIdiomRecognize::runOnLoopBlock( // Look for store instructions, which may be optimized to memset/memcpy. collectStores(BB); - // Look for a single store or sets of stores with a common base, which can be - // optimized into a memset (memset_pattern). The latter most commonly happens - // with structs and handunrolled loops. - for (auto &SL : StoreRefsForMemset) - MadeChange |= processLoopStores(SL.second, BECount, true); - - for (auto &SL : StoreRefsForMemsetPattern) - MadeChange |= processLoopStores(SL.second, BECount, false); + // Look for a single store which can be optimized into a memset. + for (auto &SI : StoreRefsForMemset) + MadeChange |= processLoopStore(SI, BECount); // Optimize the store into a memcpy, if it feeds an similarly strided load. for (auto &SI : StoreRefsForMemcpy) @@ -475,155 +458,26 @@ bool LoopIdiomRecognize::runOnLoopBlock( return MadeChange; } -/// processLoopStores - See if this store(s) can be promoted to a memset. -bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl &SL, - const SCEV *BECount, - bool ForMemset) { - // Try to find consecutive stores that can be transformed into memsets. - SetVector Heads, Tails; - SmallDenseMap ConsecutiveChain; +/// processLoopStore - See if this store can be promoted to a memset. +bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { + assert(SI->isSimple() && "Expected only non-volatile stores."); - // Do a quadratic search on all of the given stores and find - // all of the pairs of stores that follow each other. - SmallVector IndexQueue; - for (unsigned i = 0, e = SL.size(); i < e; ++i) { - assert(SL[i]->isSimple() && "Expected only non-volatile stores."); + Value *StoredVal = SI->getValueOperand(); + Value *StorePtr = SI->getPointerOperand(); - Value *FirstStoredVal = SL[i]->getValueOperand(); - Value *FirstStorePtr = SL[i]->getPointerOperand(); - const SCEVAddRecExpr *FirstStoreEv = - cast(SE->getSCEV(FirstStorePtr)); - unsigned FirstStride = getStoreStride(FirstStoreEv); - unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL); + // Check to see if the stride matches the size of the store. If so, then we + // know that every byte is touched in the loop. + const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + if (StoreSize != Stride && StoreSize != -Stride) + return false; - // See if we can optimize just this store in isolation. - if (FirstStride == FirstStoreSize || FirstStride == -FirstStoreSize) { - Heads.insert(SL[i]); - continue; - } + bool NegStride = StoreSize == -Stride; - Value *FirstSplatValue = nullptr; - Constant *FirstPatternValue = nullptr; - - if (ForMemset) - FirstSplatValue = isBytewiseValue(FirstStoredVal); - else - FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL); - - assert((FirstSplatValue || FirstPatternValue) && - "Expected either splat value or pattern value."); - - IndexQueue.clear(); - // If a store has multiple consecutive store candidates, search Stores - // array according to the sequence: from i+1 to e, then from i-1 to 0. - // This is because usually pairing with immediate succeeding or preceding - // candidate create the best chance to find memset opportunity. - unsigned j = 0; - for (j = i + 1; j < e; ++j) - IndexQueue.push_back(j); - for (j = i; j > 0; --j) - IndexQueue.push_back(j - 1); - - for (auto &k : IndexQueue) { - assert(SL[k]->isSimple() && "Expected only non-volatile stores."); - Value *SecondStorePtr = SL[k]->getPointerOperand(); - const SCEVAddRecExpr *SecondStoreEv = - cast(SE->getSCEV(SecondStorePtr)); - unsigned SecondStride = getStoreStride(SecondStoreEv); - - if (FirstStride != SecondStride) - continue; - - Value *SecondStoredVal = SL[k]->getValueOperand(); - Value *SecondSplatValue = nullptr; - Constant *SecondPatternValue = nullptr; - - if (ForMemset) - SecondSplatValue = isBytewiseValue(SecondStoredVal); - else - SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL); - - assert((SecondSplatValue || SecondPatternValue) && - "Expected either splat value or pattern value."); - - if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { - if (ForMemset) { - ConstantInt *C1 = dyn_cast(FirstSplatValue); - ConstantInt *C2 = dyn_cast(SecondSplatValue); - if (!C1 || !C2 || C1 != C2) - continue; - } else { - Constant *C1 = FirstPatternValue; - Constant *C2 = SecondPatternValue; - - if (ConstantArray *CA1 = dyn_cast(C1)) - C1 = CA1->getSplatValue(); - - if (ConstantArray *CA2 = dyn_cast(C2)) - C2 = CA2->getSplatValue(); - - if (C1 != C2) - continue; - } - Tails.insert(SL[k]); - Heads.insert(SL[i]); - ConsecutiveChain[SL[i]] = SL[k]; - break; - } - } - } - - // We may run into multiple chains that merge into a single chain. We mark the - // stores that we transformed so that we don't visit the same store twice. - SmallPtrSet TransformedStores; - bool Changed = false; - - // For stores that start but don't end a link in the chain: - for (SetVector::iterator it = Heads.begin(), e = Heads.end(); - it != e; ++it) { - if (Tails.count(*it)) - continue; - - // We found a store instr that starts a chain. Now follow the chain and try - // to transform it. - SmallPtrSet AdjacentStores; - StoreInst *I = *it; - - StoreInst *HeadStore = I; - unsigned StoreSize = 0; - - // Collect the chain into a list. - while (Tails.count(I) || Heads.count(I)) { - if (TransformedStores.count(I)) - break; - AdjacentStores.insert(I); - - StoreSize += getStoreSizeInBytes(I, DL); - // Move to the next value in the chain. - I = ConsecutiveChain[I]; - } - - Value *StoredVal = HeadStore->getValueOperand(); - Value *StorePtr = HeadStore->getPointerOperand(); - const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); - unsigned Stride = getStoreStride(StoreEv); - - // Check to see if the stride matches the size of the stores. If so, then - // we know that every byte is touched in the loop. - if (StoreSize != Stride && StoreSize != -Stride) - continue; - - bool NegStride = StoreSize == -Stride; - - if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(), - StoredVal, HeadStore, AdjacentStores, StoreEv, - BECount, NegStride)) { - TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end()); - Changed = true; - } - } - - return Changed; + // See if we can optimize just this store in isolation. + return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), + StoredVal, SI, StoreEv, BECount, NegStride); } /// processLoopMemSet - See if this memset can be promoted to a large memset. @@ -666,21 +520,18 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue)) return false; - SmallPtrSet MSIs; - MSIs.insert(MSI); return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), SplatValue, MSI, MSIs, Ev, + MSI->getAlignment(), SplatValue, MSI, Ev, BECount, /*NegStride=*/false); } /// mayLoopAccessLocation - Return true if the specified loop might access the /// specified pointer location, which is a loop-strided access. The 'Access' /// argument specifies what the verboten forms of access are (read or write). -static bool -mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, - const SCEV *BECount, unsigned StoreSize, - AliasAnalysis &AA, - SmallPtrSetImpl &IgnoredStores) { +static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, + const SCEV *BECount, unsigned StoreSize, + AliasAnalysis &AA, + Instruction *IgnoredStore) { // Get the location that may be stored across the loop. Since the access is // strided positively through memory, we say that the modified location starts // at the pointer and has infinite size. @@ -700,8 +551,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; ++BI) for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) - if (IgnoredStores.count(&*I) == 0 && - (AA.getModRefInfo(&*I, StoreLoc) & Access)) + if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access)) return true; return false; @@ -724,8 +574,7 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount, /// transform this into a memset or memset_pattern in the loop preheader, do so. bool LoopIdiomRecognize::processLoopStridedStore( Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, - Value *StoredVal, Instruction *TheStore, - SmallPtrSetImpl &Stores, const SCEVAddRecExpr *Ev, + Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride) { Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; @@ -760,7 +609,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *BasePtr = Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator()); if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, - *AA, Stores)) { + *AA, TheStore)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); @@ -813,8 +662,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - for (auto *I : Stores) - deleteDeadInstruction(I, TLI); + deleteDeadInstruction(TheStore, TLI); ++NumMemSet; return true; } @@ -866,10 +714,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *StoreBasePtr = Expander.expandCodeFor( StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); - SmallPtrSet Stores; - Stores.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, - StoreSize, *AA, Stores)) { + StoreSize, *AA, SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); @@ -889,7 +735,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, - *AA, Stores)) { + *AA, SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8989b13cccc3..2520c78b5380 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -26,7 +26,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -402,6 +401,9 @@ public: } } + /// \returns true if the memory operations A and B are consecutive. + bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL); + /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -436,6 +438,14 @@ private: /// vectorized, or NULL. They may happen in cycles. Value *alreadyVectorized(ArrayRef VL) const; + /// \brief Take the pointer operand from the Load/Store instruction. + /// \returns NULL if this is not a valid Load/Store instruction. + static Value *getPointerOperand(Value *I); + + /// \brief Take the address space operand from the Load/Store instruction. + /// \returns -1 if this is not a valid Load/Store instruction. + static unsigned getAddressSpaceOperand(Value *I); + /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. int getGatherCost(Type *Ty); @@ -1181,8 +1191,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { return; } - if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) { - if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL, *SE)) { + if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { + if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) { ++NumLoadsWantToChangeOrder; } BS.cancelScheduling(VL); @@ -1354,7 +1364,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { const DataLayout &DL = F->getParent()->getDataLayout(); // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) - if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) { + if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); @@ -1827,6 +1837,63 @@ int BoUpSLP::getGatherCost(ArrayRef VL) { return getGatherCost(VecTy); } +Value *BoUpSLP::getPointerOperand(Value *I) { + if (LoadInst *LI = dyn_cast(I)) + return LI->getPointerOperand(); + if (StoreInst *SI = dyn_cast(I)) + return SI->getPointerOperand(); + return nullptr; +} + +unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { + if (LoadInst *L = dyn_cast(I)) + return L->getPointerAddressSpace(); + if (StoreInst *S = dyn_cast(I)) + return S->getPointerAddressSpace(); + return -1; +} + +bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) { + Value *PtrA = getPointerOperand(A); + Value *PtrB = getPointerOperand(B); + unsigned ASA = getAddressSpaceOperand(A); + unsigned ASB = getAddressSpaceOperand(B); + + // Check that the address spaces match and that the pointers are valid. + if (!PtrA || !PtrB || (ASA != ASB)) + return false; + + // Make sure that A and B are different pointers of the same type. + if (PtrA == PtrB || PtrA->getType() != PtrB->getType()) + return false; + + unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); + Type *Ty = cast(PtrA->getType())->getElementType(); + APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); + + APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); + PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); + PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); + + APInt OffsetDelta = OffsetB - OffsetA; + + // Check if they are based on the same pointer. That makes the offsets + // sufficient. + if (PtrA == PtrB) + return OffsetDelta == Size; + + // Compute the necessary base pointer delta to have the necessary final delta + // equal to the size. + APInt BaseDelta = Size - OffsetDelta; + + // Otherwise compute the distance with SCEV between the base pointers. + const SCEV *PtrSCEVA = SE->getSCEV(PtrA); + const SCEV *PtrSCEVB = SE->getSCEV(PtrB); + const SCEV *C = SE->getConstant(BaseDelta); + const SCEV *X = SE->getAddExpr(PtrSCEVA, C); + return X == PtrSCEVB; +} + // Reorder commutative operations in alternate shuffle if the resulting vectors // are consecutive loads. This would allow us to vectorize the tree. // If we have something like- @@ -1854,10 +1921,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef VL, if (LoadInst *L1 = dyn_cast(Right[j + 1])) { Instruction *VL1 = cast(VL[j]); Instruction *VL2 = cast(VL[j + 1]); - if (isConsecutiveAccess(L, L1, DL, *SE) && VL1->isCommutative()) { + if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) { std::swap(Left[j], Right[j]); continue; - } else if (isConsecutiveAccess(L, L1, DL, *SE) && VL2->isCommutative()) { + } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -1868,10 +1935,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef VL, if (LoadInst *L1 = dyn_cast(Left[j + 1])) { Instruction *VL1 = cast(VL[j]); Instruction *VL2 = cast(VL[j + 1]); - if (isConsecutiveAccess(L, L1, DL, *SE) && VL1->isCommutative()) { + if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) { std::swap(Left[j], Right[j]); continue; - } else if (isConsecutiveAccess(L, L1, DL, *SE) && VL2->isCommutative()) { + } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2021,7 +2088,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef VL, for (unsigned j = 0; j < VL.size() - 1; ++j) { if (LoadInst *L = dyn_cast(Left[j])) { if (LoadInst *L1 = dyn_cast(Right[j + 1])) { - if (isConsecutiveAccess(L, L1, DL, *SE)) { + if (isConsecutiveAccess(L, L1, DL)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2029,7 +2096,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef VL, } if (LoadInst *L = dyn_cast(Right[j])) { if (LoadInst *L1 = dyn_cast(Left[j + 1])) { - if (isConsecutiveAccess(L, L1, DL, *SE)) { + if (isConsecutiveAccess(L, L1, DL)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -3394,7 +3461,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef Stores, IndexQueue.push_back(j - 1); for (auto &k : IndexQueue) { - if (isConsecutiveAccess(Stores[i], Stores[k], DL, *SE)) { + if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) { Tails.insert(Stores[k]); Heads.insert(Stores[i]); ConsecutiveChain[Stores[i]] = Stores[k]; diff --git a/llvm/test/Transforms/LoopIdiom/struct.ll b/llvm/test/Transforms/LoopIdiom/struct.ll deleted file mode 100644 index 2828024952e2..000000000000 --- a/llvm/test/Transforms/LoopIdiom/struct.ll +++ /dev/null @@ -1,221 +0,0 @@ -; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -target triple = "x86_64-apple-darwin10.0.0" - -%struct.foo = type { i32, i32 } -%struct.foo1 = type { i32, i32, i32 } -%struct.foo2 = type { i32, i16, i16 } - -;void bar1(foo_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].a = 0; -; f[i].b = 0; -; } -;} -define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 - store i32 0, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 - store i32 0, i32* %b, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar1( -; CHECK: call void @llvm.memset -; CHECK-NOT: store -} - -;void bar2(foo_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].b = 0; -; f[i].a = 0; -; } -;} -define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 - store i32 0, i32* %b, align 4 - %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 - store i32 0, i32* %a, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar2( -; CHECK: call void @llvm.memset -; CHECK-NOT: store -} - -;void bar3(foo_t *f, unsigned n) { -; for (unsigned i = n; i > 0; --i) { -; f[i].a = 0; -; f[i].b = 0; -; } -;} -define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - %0 = zext i32 %n to i64 - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 - store i32 0, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 - store i32 0, i32* %b, align 4 - %1 = trunc i64 %indvars.iv to i32 - %dec = add i32 %1, -1 - %cmp = icmp eq i32 %dec, 0 - %indvars.iv.next = add nsw i64 %indvars.iv, -1 - br i1 %cmp, label %for.end.loopexit, label %for.body - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar3( -; CHECK: call void @llvm.memset -; CHECK-NOT: store -} - -;void bar4(foo_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].a = 0; -; f[i].b = 1; -; } -;} -define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 - store i32 0, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 - store i32 1, i32* %b, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar4( -; CHECK-NOT: call void @llvm.memset -} - -;void bar5(foo1_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].a = 0; -; f[i].b = 0; -; } -;} -define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 - store i32 0, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1 - store i32 0, i32* %b, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar5( -; CHECK-NOT: call void @llvm.memset -} - -;void bar6(foo2_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].a = 0; -; f[i].b = 0; -; f[i].c = 0; -; } -;} -define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 0 - store i32 0, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 1 - store i16 0, i16* %b, align 4 - %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 2 - store i16 0, i16* %c, align 2 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar6( -; CHECK: call void @llvm.memset -; CHECK-NOT: store -} diff --git a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll deleted file mode 100644 index d7809b746b15..000000000000 --- a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll +++ /dev/null @@ -1,186 +0,0 @@ -; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 -; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 -; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 - -target triple = "x86_64-apple-darwin10.0.0" - -%struct.foo = type { i32, i32 } -%struct.foo1 = type { i32, i32, i32 } - -;void bar1(foo_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].a = 2; -; f[i].b = 2; -; } -;} -define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 - store i32 2, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 - store i32 2, i32* %b, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar1( -; CHECK: call void @memset_pattern16 -; CHECK-NOT: store -} - -;void bar2(foo_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].b = 2; -; f[i].a = 2; -; } -;} -define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 - store i32 2, i32* %b, align 4 - %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 - store i32 2, i32* %a, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar2( -; CHECK: call void @memset_pattern16 -; CHECK-NOT: store -} - -;void bar3(foo_t *f, unsigned n) { -; for (unsigned i = n; i > 0; --i) { -; f[i].a = 2; -; f[i].b = 2; -; } -;} -define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - %0 = zext i32 %n to i64 - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 - store i32 2, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 - store i32 2, i32* %b, align 4 - %1 = trunc i64 %indvars.iv to i32 - %dec = add i32 %1, -1 - %cmp = icmp eq i32 %dec, 0 - %indvars.iv.next = add nsw i64 %indvars.iv, -1 - br i1 %cmp, label %for.end.loopexit, label %for.body - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar3( -; CHECK: call void @memset_pattern16 -; CHECK-NOT: store -} - -;void bar4(foo_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].a = 0; -; f[i].b = 1; -; } -;} -define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 - store i32 0, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 - store i32 1, i32* %b, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar4( -; CHECK-NOT: call void @memset_pattern16 -} - -;void bar5(foo1_t *f, unsigned n) { -; for (unsigned i = 0; i < n; ++i) { -; f[i].a = 1; -; f[i].b = 1; -; } -;} -define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp { -entry: - %cmp1 = icmp eq i32 %n, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 - store i32 1, i32* %a, align 4 - %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1 - store i32 1, i32* %b, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp ne i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @bar5( -; CHECK-NOT: call void @memset_pattern16 -} diff --git a/llvm/test/Transforms/LoopIdiom/unroll.ll b/llvm/test/Transforms/LoopIdiom/unroll.ll deleted file mode 100644 index 0cdfda254d78..000000000000 --- a/llvm/test/Transforms/LoopIdiom/unroll.ll +++ /dev/null @@ -1,80 +0,0 @@ -; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -; CHECK @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 - -target triple = "x86_64-apple-darwin10.0.0" - -;void test(int *f, unsigned n) { -; for (unsigned i = 0; i < 2 * n; i += 2) { -; f[i] = 0; -; f[i+1] = 0; -; } -;} -define void @test(i32* %f, i32 %n) nounwind ssp { -entry: - %mul = shl i32 %n, 1 - %cmp1 = icmp eq i32 %mul, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - %0 = zext i32 %mul to i64 - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv - store i32 0, i32* %arrayidx, align 4 - %1 = or i64 %indvars.iv, 1 - %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1 - store i32 0, i32* %arrayidx2, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 - %cmp = icmp ult i64 %indvars.iv.next, %0 - br i1 %cmp, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @test( -; CHECK: call void @llvm.memset -; CHECK-NOT: store -} - -;void test_pattern(int *f, unsigned n) { -; for (unsigned i = 0; i < 2 * n; i += 2) { -; f[i] = 2; -; f[i+1] = 2; -; } -;} -define void @test_pattern(i32* %f, i32 %n) nounwind ssp { -entry: - %mul = shl i32 %n, 1 - %cmp1 = icmp eq i32 %mul, 0 - br i1 %cmp1, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - %0 = zext i32 %mul to i64 - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv - store i32 2, i32* %arrayidx, align 4 - %1 = or i64 %indvars.iv, 1 - %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1 - store i32 2, i32* %arrayidx2, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 - %cmp = icmp ult i64 %indvars.iv.next, %0 - br i1 %cmp, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -; CHECK-LABEL: @test_pattern( -; CHECK: call void @memset_pattern16 -; CHECK-NOT: store -}