diff --git a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 1b423abc6e58..b040a2711b47 100644 --- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -74,10 +74,6 @@ namespace { private: TargetData *TD; - /// DeadInsts - Keep track of instructions we have made dead, so that - /// we can remove them after we are done working. - SmallVector DeadInsts; - /// AllocaInfo - When analyzing uses of an alloca instruction, this captures /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. @@ -106,30 +102,25 @@ namespace { int isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, AllocaInfo &Info); - void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, - uint64_t &ArrayOffset, AllocaInfo &Info); - void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, - uint64_t MemSize, const Type *MemOpType, bool isStore, - AllocaInfo &Info); - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); - unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); + void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, + AllocaInfo &Info); + void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, + AllocaInfo &Info); + void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, + unsigned OpNo, AllocaInfo &Info); + void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, + AllocaInfo &Info); void DoScalarReplacement(AllocaInst *AI, std::vector &WorkList); - void DeleteDeadInstructions(); void CleanupGEP(GetElementPtrInst *GEP); - void CleanupAllocaUsers(Value *V); + void CleanupAllocaUsers(AllocaInst *AI); AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, + void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, + SmallVector &NewElts); + + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocaInst *AI, SmallVector &NewElts); void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -369,37 +360,176 @@ void SROA::DoScalarReplacement(AllocaInst *AI, } } - // Now that we have created the new alloca instructions, rewrite all the - // uses of the old alloca. - DeadInsts.push_back(AI); - RewriteForScalarRepl(AI, AI, 0, ElementAllocas); + // Now that we have created the alloca instructions that we want to use, + // expand the getelementptr instructions to use them. + while (!AI->use_empty()) { + Instruction *User = cast(AI->use_back()); + if (BitCastInst *BCInst = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); + BCInst->eraseFromParent(); + continue; + } + + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + if (LoadInst *LI = dyn_cast(User)) { + Value *Insert = UndefValue::get(LI->getType()); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Load = new LoadInst(ElementAllocas[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + LI->eraseFromParent(); + continue; + } - // Now erase any instructions that were made dead while rewriting the alloca. - DeleteDeadInstructions(); + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, ElementAllocas[i], SI); + } + SI->eraseFromParent(); + continue; + } + + GetElementPtrInst *GEPI = cast(User); + // We now know that the GEP is of the form: GEP , 0, + unsigned Idx = + (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); + assert(Idx < ElementAllocas.size() && "Index out of range?"); + AllocaInst *AllocaToUse = ElementAllocas[Idx]; + + Value *RepValue; + if (GEPI->getNumOperands() == 3) { + // Do not insert a new getelementptr instruction with zero indices, only + // to have it optimized out later. + RepValue = AllocaToUse; + } else { + // We are indexing deeply into the structure, so we still need a + // getelement ptr instruction to finish the indexing. This may be + // expanded itself once the worklist is rerun. + // + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue( + Type::getInt32Ty(AI->getContext()))); + NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); + RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), + NewArgs.end(), "", GEPI); + RepValue->takeName(GEPI); + } + + // If this GEP is to the start of the aggregate, check for memcpys. + if (Idx == 0 && GEPI->hasAllZeroIndices()) + RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); + + // Move all of the users over to the new GEP. + GEPI->replaceAllUsesWith(RepValue); + // Delete the old GEP + GEPI->eraseFromParent(); + } + + // Finally, delete the Alloca instruction + AI->eraseFromParent(); NumReplaced++; } -/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, -/// recursively including all their operands that become trivially dead. -void SROA::DeleteDeadInstructions() { - while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); - if (I == 0) - continue; +/// isSafeElementUse - Check to see if this use is an allowed use for a +/// getelementptr instruction of an array aggregate allocation. isFirstElt +/// indicates whether Ptr is known to the start of the aggregate. +void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); + I != E; ++I) { + Instruction *User = cast(*I); + switch (User->getOpcode()) { + case Instruction::Load: break; + case Instruction::Store: + // Store is ok if storing INTO the pointer, not storing the pointer + if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); + break; + case Instruction::GetElementPtr: { + GetElementPtrInst *GEP = cast(User); + bool AreAllZeroIndices = isFirstElt; + if (GEP->getNumOperands() > 1 && + (!isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isZero())) + // Using pointer arithmetic to navigate the array. + return MarkUnsafe(Info); + + // Verify that any array subscripts are in range. + for (gep_type_iterator GEPIt = gep_type_begin(GEP), + E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { + // Ignore struct elements, no extra checking needed for these. + if (isa(*GEPIt)) + continue; - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) - if (Instruction *U = dyn_cast(*OI)) { - // Zero out the operand and see if it becomes trivially dead. - *OI = 0; - if (isInstructionTriviallyDead(U)) - DeadInsts.push_back(U); + // This GEP indexes an array. Verify that this is an in-range + // constant integer. Specifically, consider A[0][i]. We cannot know that + // the user isn't doing invalid things like allowing i to index an + // out-of-range subscript that accesses A[1]. Because of this, we have + // to reject SROA of any accesses into structs where any of the + // components are variables. + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); + + // Are all indices still zero? + AreAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= AT->getNumElements()) + return MarkUnsafe(Info); + } else if (const VectorType *VT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= VT->getNumElements()) + return MarkUnsafe(Info); + } } - - I->eraseFromParent(); + + isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); + if (Info.isUnsafe) return; + break; + } + case Instruction::BitCast: + if (isFirstElt) { + isSafeUseOfBitCastedAllocation(cast(User), AI, Info); + if (Info.isUnsafe) return; + break; + } + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + case Instruction::Call: + if (MemIntrinsic *MI = dyn_cast(User)) { + if (isFirstElt) { + isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); + if (Info.isUnsafe) return; + break; + } + } + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + default: + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + } } + return; // All users look ok :) } - + /// AllUsersAreLoads - Return true if all users of this value are loads. static bool AllUsersAreLoads(Value *Ptr) { for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); @@ -409,116 +539,72 @@ static bool AllUsersAreLoads(Value *Ptr) { return true; } -/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to -/// performing scalar replacement of alloca AI. The results are flagged in -/// the Info parameter. Offset and ArrayOffset indicate the position within -/// AI that is referenced by this instruction. -void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, AllocaInfo &Info) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast(*UI); +/// isSafeUseOfAllocation - Check if this user is an allowed use for an +/// aggregate allocation. +void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, + AllocaInfo &Info) { + if (BitCastInst *C = dyn_cast(User)) + return isSafeUseOfBitCastedAllocation(C, AI, Info); - if (BitCastInst *BC = dyn_cast(User)) { - isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - uint64_t GEPArrayOffset = ArrayOffset; - uint64_t GEPOffset = Offset; - isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); - if (!Info.isUnsafe) - isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - if (Length) - isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, - UI.getOperandNo() == 1, Info); - else - MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast(User)) { - if (!LI->isVolatile()) { - const Type *LIType = LI->getType(); - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), - LIType, false, Info); - } else - MarkUnsafe(Info); - } else if (StoreInst *SI = dyn_cast(User)) { - // Store is ok if storing INTO the pointer, not storing the pointer - if (!SI->isVolatile() && SI->getOperand(0) != I) { - const Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), - SIType, true, Info); - } else - MarkUnsafe(Info); - } else if (isa(UI)) { - // If one user is DbgInfoIntrinsic then check if all users are - // DbgInfoIntrinsics. - if (OnlyUsedByDbgInfoIntrinsics(I)) { - Info.needsCleanup = true; - return; - } - MarkUnsafe(Info); - } else { - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - MarkUnsafe(Info); - } - if (Info.isUnsafe) return; - } -} + if (LoadInst *LI = dyn_cast(User)) + if (!LI->isVolatile()) + return;// Loads (returning a first class aggregrate) are always rewritable -/// isSafeGEP - Check if a GEP instruction can be handled for scalar -/// replacement. It is safe when all the indices are constant, in-bounds -/// references, and when the resulting offset corresponds to an element within -/// the alloca type. The results are flagged in the Info parameter. Upon -/// return, Offset is adjusted as specified by the GEP indices. For the -/// special case of a variable index to a 2-element array, ArrayOffset is set -/// to the array element size. -void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, - uint64_t &Offset, uint64_t &ArrayOffset, - AllocaInfo &Info) { - gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); - if (GEPIt == E) - return; - - // The first GEP index must be zero. - if (!isa(GEPIt.getOperand()) || - !cast(GEPIt.getOperand())->isZero()) + if (StoreInst *SI = dyn_cast(User)) + if (!SI->isVolatile() && SI->getOperand(0) != AI) + return;// Store is ok if storing INTO the pointer, not storing the pointer + + GetElementPtrInst *GEPI = dyn_cast(User); + if (GEPI == 0) return MarkUnsafe(Info); - if (++GEPIt == E) - return; + gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); + + // The GEP is not safe to transform if not of the form "GEP , 0, ". + if (I == E || + I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { + return MarkUnsafe(Info); + } + + ++I; + if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? + + bool IsAllZeroIndices = true; + // If the first index is a non-constant index into an array, see if we can // handle it as a special case. - const Type *ArrayEltTy = 0; - if (ArrayOffset == 0 && Offset == 0) { - if (const ArrayType *AT = dyn_cast(*GEPIt)) { - if (!isa(GEPIt.getOperand())) { - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements - // in it, in which case we CAN promote it, but we have to canonicalize - // this out if this is the only problem. - if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) - return MarkUnsafe(Info); + if (const ArrayType *AT = dyn_cast(*I)) { + if (!isa(I.getOperand())) { + IsAllZeroIndices = 0; + uint64_t NumElements = AT->getNumElements(); + + // If this is an array index and the index is not constant, we cannot + // promote... that is unless the array has exactly one or two elements in + // it, in which case we CAN promote it, but we have to canonicalize this + // out if this is the only problem. + if ((NumElements == 1 || NumElements == 2) && + AllUsersAreLoads(GEPI)) { Info.needsCleanup = true; - ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); - ArrayEltTy = AT->getElementType(); - ++GEPIt; + return; // Canonicalization required! } + return MarkUnsafe(Info); } } - + // Walk through the GEP type indices, checking the types that this indexes // into. - for (; GEPIt != E; ++GEPIt) { + for (; I != E; ++I) { // Ignore struct elements, no extra checking needed for these. - if (isa(*GEPIt)) + if (isa(*I)) continue; + + ConstantInt *IdxVal = dyn_cast(I.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) - return MarkUnsafe(Info); - - if (const ArrayType *AT = dyn_cast(*GEPIt)) { + // Are all indices still zero? + IsAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*I)) { // This GEP indexes an array. Verify that this is an in-range constant // integer. Specifically, consider A[0][i]. We cannot know that the user // isn't doing invalid things like allowing i to index an out-of-range @@ -526,255 +612,147 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, // of any accesses into structs where any of the components are variables. if (IdxVal->getZExtValue() >= AT->getNumElements()) return MarkUnsafe(Info); - } else { - const VectorType *VT = dyn_cast(*GEPIt); - assert(VT && "unexpected type in GEP type iterator"); + } else if (const VectorType *VT = dyn_cast(*I)) { if (IdxVal->getZExtValue() >= VT->getNumElements()) return MarkUnsafe(Info); } } - - // All the indices are safe. Now compute the offset due to this GEP and - // check if the alloca has a component element at that offset. - if (ArrayOffset == 0) { - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - } else { - // Both array elements have the same type, so it suffices to check one of - // them. Copy the GEP indices starting from the array index, but replace - // that variable index with a constant zero. - SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); - Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); - Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); - } - if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) - MarkUnsafe(Info); + + // If there are any non-simple uses of this getelementptr, make sure to reject + // them. + return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); } -/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI -/// alloca or has an offset and size that corresponds to a component element -/// within it. The offset checked here may have been formed from a GEP with a -/// pointer bitcasted to a different type. -void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, uint64_t MemSize, - const Type *MemOpType, bool isStore, - AllocaInfo &Info) { - // Check if this is a load/store of the entire alloca. - if (Offset == 0 && ArrayOffset == 0 && - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { - bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); - // This is safe for MemIntrinsics (where MemOpType is 0), integer types - // (which are essentially the same as the MemIntrinsics, especially with - // regard to copying padding between elements), or references using the - // aggregate type of the alloca. - if (!MemOpType || isa(MemOpType) || UsesAggregateType) { - if (!UsesAggregateType) { - if (isStore) - Info.isMemCpyDst = true; - else - Info.isMemCpySrc = true; +/// isSafeMemIntrinsicOnAllocation - Check if the specified memory +/// intrinsic can be promoted by SROA. At this point, we know that the operand +/// of the memintrinsic is a pointer to the beginning of the allocation. +void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, + unsigned OpNo, AllocaInfo &Info) { + // If not constant length, give up. + ConstantInt *Length = dyn_cast(MI->getLength()); + if (!Length) return MarkUnsafe(Info); + + // If not the whole aggregate, give up. + if (Length->getZExtValue() != + TD->getTypeAllocSize(AI->getType()->getElementType())) + return MarkUnsafe(Info); + + // We only know about memcpy/memset/memmove. + if (!isa(MI)) + return MarkUnsafe(Info); + + // Otherwise, we can transform it. Determine whether this is a memcpy/set + // into or out of the aggregate. + if (OpNo == 1) + Info.isMemCpyDst = true; + else { + assert(OpNo == 2); + Info.isMemCpySrc = true; + } +} + +/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast +/// from an alloca are safe for SROA of that alloca. +void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCU = dyn_cast(UI)) { + isSafeUseOfBitCastedAllocation(BCU, AI, Info); + } else if (MemIntrinsic *MI = dyn_cast(UI)) { + isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); + } else if (StoreInst *SI = dyn_cast(UI)) { + if (SI->isVolatile()) + return MarkUnsafe(Info); + + // If storing the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and store through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(SI->getOperand(0)->getType()) && + TD->getTypeAllocSize(SI->getOperand(0)->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpyDst = true; + continue; } - return; + return MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(UI)) { + if (LI->isVolatile()) + return MarkUnsafe(Info); + + // If loading the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and load through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(LI->getType()) && + TD->getTypeAllocSize(LI->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpySrc = true; + continue; + } + return MarkUnsafe(Info); + } else if (isa(UI)) { + // If one user is DbgInfoIntrinsic then check if all users are + // DbgInfoIntrinsics. + if (OnlyUsedByDbgInfoIntrinsics(BC)) { + Info.needsCleanup = true; + return; + } + else + MarkUnsafe(Info); } - } - // Check if the offset/size correspond to a component within the alloca type. - const Type *T = AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize) && - (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) - return; - - return MarkUnsafe(Info); -} - -/// TypeHasComponent - Return true if T has a component type with the -/// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { - const Type *EltTy; - uint64_t EltSize; - if (const StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = TD->getStructLayout(ST); - unsigned EltIdx = Layout->getElementContainingOffset(Offset); - EltTy = ST->getContainedType(EltIdx); - EltSize = TD->getTypeAllocSize(EltTy); - Offset -= Layout->getElementOffset(EltIdx); - } else if (const ArrayType *AT = dyn_cast(T)) { - EltTy = AT->getElementType(); - EltSize = TD->getTypeAllocSize(EltTy); - Offset %= EltSize; - } else { - return false; - } - if (Offset == 0 && (Size == 0 || EltSize == Size)) - return true; - // Check if the component spans multiple elements. - if (Offset + Size > EltSize) - return false; - return TypeHasComponent(EltTy, Offset, Size); -} - -/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite -/// the instruction I, which references it, to use the separate elements. -/// Offset indicates the position within AI that is referenced by this -/// instruction. -void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast(*UI); - - if (BitCastInst *BC = dyn_cast(User)) { - RewriteBitCast(BC, AI, Offset, NewElts); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - RewriteGEP(GEPI, AI, Offset, NewElts); - } else if (MemIntrinsic *MI = dyn_cast(User)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - uint64_t MemSize = Length->getZExtValue(); - if (Offset == 0 && - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) - RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); - } else if (LoadInst *LI = dyn_cast(User)) { - const Type *LIType = LI->getType(); - if (LIType == AI->getAllocatedType()) { - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - Value *Insert = UndefValue::get(LIType); - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Load = new LoadInst(NewElts[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - DeadInsts.push_back(LI); - } else if (isa(LIType) && - TD->getTypeAllocSize(LIType) == - TD->getTypeAllocSize(AI->getAllocatedType())) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); - } - } else if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - const Type *SIType = Val->getType(); - if (SIType == AI->getAllocatedType()) { - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, NewElts[i], SI); - } - DeadInsts.push_back(SI); - } else if (isa(SIType) && - TD->getTypeAllocSize(SIType) == - TD->getTypeAllocSize(AI->getAllocatedType())) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); - } + else { + return MarkUnsafe(Info); } + if (Info.isUnsafe) return; } } -/// RewriteBitCast - Update a bitcast reference to the alloca being replaced -/// and recursively continue updating all of its uses. -void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - RewriteForScalarRepl(BC, AI, Offset, NewElts); - if (BC->getOperand(0) != AI) - return; +/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes +/// to its first element. Transform users of the cast to use the new values +/// instead. +void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, + SmallVector &NewElts) { + Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); + while (UI != UE) { + Instruction *User = cast(*UI++); + if (BitCastInst *BCU = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCU, AI, NewElts); + if (BCU->use_empty()) BCU->eraseFromParent(); + continue; + } - // The bitcast references the original alloca. Replace its uses with - // references to the first new element alloca. - Instruction *Val = NewElts[0]; - if (Val->getType() != BC->getDestTy()) { - Val = new BitCastInst(Val, BC->getDestTy(), "", BC); - Val->takeName(BC); + if (MemIntrinsic *MI = dyn_cast(User)) { + // This must be memcpy/memmove/memset of the entire aggregate. + // Split into one per element. + RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); + continue; + } + + if (StoreInst *SI = dyn_cast(User)) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + continue; + } + + if (LoadInst *LI = dyn_cast(User)) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + continue; + } + + // Otherwise it must be some other user of a gep of the first pointer. Just + // leave these alone. + continue; } - BC->replaceAllUsesWith(Val); - DeadInsts.push_back(BC); -} - -/// FindElementAndOffset - Return the index of the element containing Offset -/// within the specified type, which must be either a struct or an array. -/// Sets T to the type of the element and Offset to the offset within that -/// element. -unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { - unsigned Idx = 0; - if (const StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = TD->getStructLayout(ST); - Idx = Layout->getElementContainingOffset(Offset); - T = ST->getContainedType(Idx); - Offset -= Layout->getElementOffset(Idx); - } else { - const ArrayType *AT = dyn_cast(T); - assert(AT && "unexpected type for scalar replacement"); - T = AT->getElementType(); - uint64_t EltSize = TD->getTypeAllocSize(T); - Idx = (unsigned)(Offset / EltSize); - Offset -= Idx * EltSize; - } - return Idx; -} - -/// RewriteGEP - Check if this GEP instruction moves the pointer across -/// elements of the alloca that are being split apart, and if so, rewrite -/// the GEP to be relative to the new element. -void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - uint64_t OldOffset = Offset; - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - - RewriteForScalarRepl(GEPI, AI, Offset, NewElts); - - const Type *T = AI->getAllocatedType(); - unsigned OldIdx = FindElementAndOffset(T, OldOffset); - if (GEPI->getOperand(0) == AI) - OldIdx = ~0U; // Force the GEP to be rewritten. - - T = AI->getAllocatedType(); - uint64_t EltOffset = Offset; - unsigned Idx = FindElementAndOffset(T, EltOffset); - - // If this GEP does not move the pointer across elements of the alloca - // being split, then it does not needs to be rewritten. - if (Idx == OldIdx) - return; - - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue(i32Ty)); - while (EltOffset != 0) { - unsigned EltIdx = FindElementAndOffset(T, EltOffset); - NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); - } - Instruction *Val = NewElts[Idx]; - if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), - NewArgs.end(), "", GEPI); - Val->takeName(GEPI); - } - if (Val->getType() != GEPI->getType()) - Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); - GEPI->replaceAllUsesWith(Val); - DeadInsts.push_back(GEPI); } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocaInst *AI, SmallVector &NewElts) { + // If this is a memcpy/memmove, construct the other pointer as the // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For @@ -783,41 +761,28 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy - if (Inst == MTI->getRawDest()) + if (BCInst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); else { - assert(Inst == MTI->getRawSource()); + assert(BCInst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } } + // Keep track of the other intrinsic argument, so it can be removed if it + // is dead when the intrinsic is replaced. + Value *PossiblyDead = OtherPtr; + // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { - - // Remove bitcasts and all-zero GEPs from OtherPtr. This is an - // optimization, but it's also required to detect the corner case where - // both pointer operands are referencing the same memory, and where - // OtherPtr may be a bitcast or GEP that currently being rewritten. (This - // function is only called for mem intrinsics that access the whole - // aggregate, so non-zero GEPs are not an issue here.) - while (1) { - if (BitCastInst *BC = dyn_cast(OtherPtr)) { - OtherPtr = BC->getOperand(0); - continue; - } - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { - // All zero GEPs are effectively bitcasts. - if (GEP->hasAllZeroIndices()) { - OtherPtr = GEP->getOperand(0); - continue; - } - } - break; - } - // If OtherPtr has already been rewritten, this intrinsic will be dead. - if (OtherPtr == NewElts[0]) - return; + // It is likely that OtherPtr is a bitcast, if so, remove it. + if (BitCastInst *BC = dyn_cast(OtherPtr)) + OtherPtr = BC->getOperand(0); + // All zero GEPs are effectively bitcasts. + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) + if (GEP->hasAllZeroIndices()) + OtherPtr = GEP->getOperand(0); if (ConstantExpr *BCE = dyn_cast(OtherPtr)) if (BCE->getOpcode() == Instruction::BitCast) @@ -833,7 +798,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // Process each element of the aggregate. Value *TheFn = MI->getOperand(0); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == Inst; + bool SROADest = MI->getRawDest() == BCInst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); @@ -842,15 +807,12 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Value *OtherElt = 0; unsigned OtherEltAlign = MemAlignment; - if (OtherPtr == AI) { - OtherElt = NewElts[i]; - OtherEltAlign = 0; - } else if (OtherPtr) { + if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, OtherPtr->getNameStr()+"."+Twine(i), - MI); + MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast(OtherPtr->getType()); if (const StructType *ST = @@ -962,7 +924,9 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } } - DeadInsts.push_back(MI); + MI->eraseFromParent(); + if (PossiblyDead) + RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); } /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that @@ -973,9 +937,15 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getAllocatedType(); + const Type *AllocaEltTy = AI->getType()->getElementType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + // If this isn't a store of an integer to the whole alloca, it may be a store + // to the first element. Just ignore the store in this case and normal SROA + // will handle it. + if (!isa(SrcVal->getType()) || + TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) + return; // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, @@ -1080,7 +1050,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } } - DeadInsts.push_back(SI); + SI->eraseFromParent(); } /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to @@ -1089,9 +1059,16 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getAllocatedType(); + const Type *AllocaEltTy = AI->getType()->getElementType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + // If this isn't a load of the whole alloca to an integer, it may be a load + // of the first element. Just ignore the load in this case and normal SROA + // will handle it. + if (!isa(LI->getType()) || + TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) + return; + DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -1162,9 +1139,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); - DeadInsts.push_back(LI); + LI->eraseFromParent(); } + /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1214,10 +1192,14 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // the users are safe to transform. AllocaInfo Info; - isSafeForScalarRepl(AI, AI, 0, 0, Info); - if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << '\n'); - return 0; + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); + I != E; ++I) { + isSafeUseOfAllocation(cast(*I), AI, Info); + if (Info.isUnsafe) { + DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " + << **I << '\n'); + return 0; + } } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1226,7 +1208,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getAllocatedType(), *TD)) + HasPadding(AI->getType()->getElementType(), *TD)) return 0; // If we require cleanup, return 1, otherwise return 3. @@ -1263,15 +1245,15 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) { // Insert the new GEP instructions, which are properly indexed. SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0",GEPI); + Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".0", GEPI); Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); - Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); + Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".1", GEPI); // Replace all loads of the variable index GEP with loads from both // indexes and a select. while (!GEPI->use_empty()) { @@ -1282,24 +1264,22 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) { LI->replaceAllUsesWith(R); LI->eraseFromParent(); } + GEPI->eraseFromParent(); } + /// CleanupAllocaUsers - If SROA reported that it can promote the specified /// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CleanupAllocaUsers(Value *V) { +void SROA::CleanupAllocaUsers(AllocaInst *AI) { // At this point, we know that the end result will be SROA'd and promoted, so // we can insert ugly code if required so long as sroa+mem2reg will clean it // up. - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { User *U = *UI++; - if (isa(U)) { - CleanupAllocaUsers(U); - } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (GetElementPtrInst *GEPI = dyn_cast(U)) CleanupGEP(GEPI); - CleanupAllocaUsers(GEPI); - if (GEPI->use_empty()) GEPI->eraseFromParent(); - } else { + else { Instruction *I = cast(U); SmallVector DbgInUses; if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { @@ -1415,7 +1395,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), &Indices[0], Indices.size()); // See if all uses can be converted. if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, @@ -1477,7 +1457,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); diff --git a/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll deleted file mode 100644 index 71f66d67b94d..000000000000 --- a/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll +++ /dev/null @@ -1,89 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; Radar 7441282 - -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10" - -%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } -%struct.int16x8_t = type { <8 x i16> } -%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } -%union..0anon = type { %struct.int16x8x2_t } - -define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { -; CHECK: @test -; CHECK-NOT: alloca -; CHECK: "alloca point" -entry: - %tmp_addr = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=3] - %dst_addr = alloca %struct.int16x8x2_t* ; <%struct.int16x8x2_t**> [#uses=2] - %__rv = alloca %union..0anon ; <%union..0anon*> [#uses=2] - %__bx = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] - %__ax = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] - %tmp2 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] - %0 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %tmp.0, <8 x i16>* %1 - store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr - %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %4 = load <8 x i16>* %3, align 16 ; <<8 x i16>> [#uses=1] - store <8 x i16> %4, <8 x i16>* %2, align 16 - %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %7 = load <8 x i16>* %6, align 16 ; <<8 x i16>> [#uses=1] - store <8 x i16> %7, <8 x i16>* %5, align 16 - %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %9 = load <8 x i16>* %8, align 16 ; <<8 x i16>> [#uses=2] - %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %11 = load <8 x i16>* %10, align 16 ; <<8 x i16>> [#uses=2] - %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] - %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] - %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] - %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %14, <8 x i16>* %15 - %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] - %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %16, <8 x i16>* %17 - %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] - %19 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] - %20 = bitcast %struct.int16x8x2_t* %18 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) - %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] - %21 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) - %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] - %23 = bitcast %struct.int16x8x2_t* %22 to i8* ; [#uses=1] - %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) - br label %return - -; CHECK: store <8 x i16> -; CHECK: store <8 x i16> - -return: ; preds = %entry - ret void -} - -; Radar 7466574 -%struct._NSRange = type { i64 } - -define arm_apcscc void @test_memcpy_self() nounwind { -; CHECK: @test_memcpy_self -; CHECK-NOT: alloca -; CHECK: br i1 -entry: - %range = alloca %struct._NSRange ; <%struct._NSRange*> [#uses=2] - br i1 undef, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %tmp3 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] - %tmp4 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8) - ret void - -cond.false: ; preds = %entry - ret void -} - -declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind