diff --git a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 23684e4dd2b3..241e0cdfc14d 100644 --- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -120,7 +120,8 @@ namespace { void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocationInst *AI, SmallVector &NewElts); - + void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI, + SmallVector &NewElts); const Type *CanConvertToScalar(Value *V, bool &IsNotTrivial); void ConvertToScalar(AllocationInst *AI, const Type *Ty); @@ -586,6 +587,18 @@ void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI, isSafeUseOfBitCastedAllocation(BCU, AI, Info); } else if (MemIntrinsic *MI = dyn_cast(UI)) { isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); + } else if (StoreInst *SI = dyn_cast(UI)) { + // If storing the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and store through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(SI->getOperand(0)->getType()) && + TD->getABITypeSize(SI->getOperand(0)->getType()) == + TD->getABITypeSize(AI->getType()->getElementType())) { + Info.isMemCpyDst = true; + continue; + } + return MarkUnsafe(Info); } else { return MarkUnsafe(Info); } @@ -603,7 +616,7 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, Instruction *User = cast(*UI++); if (BitCastInst *BCU = dyn_cast(User)) { RewriteBitCastUserOfAlloca(BCU, AI, NewElts); - BCU->eraseFromParent(); + if (BCU->use_empty()) BCU->eraseFromParent(); continue; } @@ -611,12 +624,17 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, // This must be memcpy/memmove/memset of the entire aggregate. // Split into one per element. RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); - MI->eraseFromParent(); continue; } - // If it's not a mem intrinsic, it must be some other user of a gep of the - // first pointer. Just leave these alone. + if (StoreInst *SI = dyn_cast(User)) { + // This must be a store of the entire alloca from an integer. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + continue; + } + + // Otherwise it must be some other user of a gep of the first pointer. Just + // leave these alone. continue; } } @@ -772,8 +790,118 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } } + MI->eraseFromParent(); } + +/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that +/// overwrites the entire allocation. Extract out the pieces of the stored +/// integer and store them individually. +void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, + AllocationInst *AI, + SmallVector &NewElts){ + // Extract each element out of the integer according to its structure offset + // and store the element value to the individual alloca. + Value *SrcVal = SI->getOperand(0); + const Type *AllocaEltTy = AI->getType()->getElementType(); + uint64_t AllocaSizeBits = TD->getABITypeSizeInBits(AllocaEltTy); + // If this isn't a store of an integer to the whole alloca, it may be a store + // to the first element. Just ignore the store in this case and normal SROA + // will handle it. + if (!isa(SrcVal->getType()) || + TD->getABITypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) + return; + + DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; + + // There are two forms here: AI could be an array or struct. Both cases + // have different ways to compute the element offset. + if (const StructType *EltSTy = dyn_cast(AllocaEltTy)) { + const StructLayout *Layout = TD->getStructLayout(EltSTy); + + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + // Get the number of bits to shift SrcVal to get the value. + const Type *FieldTy = EltSTy->getElementType(i); + uint64_t Shift = Layout->getElementOffsetInBits(i); + + if (TD->isBigEndian()) + Shift = AllocaSizeBits-Shift-TD->getABITypeSizeInBits(FieldTy); + + Value *EltVal = SrcVal; + if (Shift) { + Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); + EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, + "sroa.store.elt", SI); + } + + // Truncate down to an integer of the right size. + uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); + if (FieldSizeBits != AllocaSizeBits) + EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI); + Value *DestField = NewElts[i]; + if (EltVal->getType() == FieldTy) { + // Storing to an integer field of this size, just do it. + } else if (FieldTy->isFloatingPoint() || isa(FieldTy)) { + // Bitcast to the right element type (for fp/vector values). + EltVal = new BitCastInst(EltVal, FieldTy, "", SI); + } else { + // Otherwise, bitcast the dest pointer (for aggregates). + DestField = new BitCastInst(DestField, + PointerType::getUnqual(EltVal->getType()), + "", SI); + } + new StoreInst(EltVal, DestField, SI); + } + + } else { + const ArrayType *ATy = cast(AllocaEltTy); + const Type *ArrayEltTy = ATy->getElementType(); + uint64_t ElementOffset = TD->getABITypeSizeInBits(ArrayEltTy); + uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); + + uint64_t Shift; + + if (TD->isBigEndian()) + Shift = AllocaSizeBits-ElementOffset; + else + Shift = 0; + + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + + Value *EltVal = SrcVal; + if (Shift) { + Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); + EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, + "sroa.store.elt", SI); + } + + // Truncate down to an integer of the right size. + if (ElementSizeBits != AllocaSizeBits) + EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI); + Value *DestField = NewElts[i]; + if (EltVal->getType() == ArrayEltTy) { + // Storing to an integer field of this size, just do it. + } else if (ArrayEltTy->isFloatingPoint() || isa(ArrayEltTy)) { + // Bitcast to the right element type (for fp/vector values). + EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); + } else { + // Otherwise, bitcast the dest pointer (for aggregates). + DestField = new BitCastInst(DestField, + PointerType::getUnqual(EltVal->getType()), + "", SI); + } + new StoreInst(EltVal, DestField, SI); + + if (TD->isBigEndian()) + Shift -= ElementOffset; + else + Shift += ElementOffset; + } + } + + SI->eraseFromParent(); +} + /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. diff --git a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll new file mode 100644 index 000000000000..c3685d093077 --- /dev/null +++ b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll @@ -0,0 +1,31 @@ +; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca +; PR3290 + +;; Store of integer to whole alloca struct. +define i32 @test1(i64 %V) nounwind { + %X = alloca {{i32, i32}} + %Y = bitcast {{i32,i32}}* %X to i64* + store i64 %V, i64* %Y + + %A = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 0 + %B = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 1 + %a = load i32* %A + %b = load i32* %B + %c = add i32 %a, %b + ret i32 %c +} + +;; Store of integer to whole struct/array alloca. +define float @test2(i128 %V) nounwind { + %X = alloca {[4 x float]} + %Y = bitcast {[4 x float]}* %X to i128* + store i128 %V, i128* %Y + + %A = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 0 + %B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3 + %a = load float* %A + %b = load float* %B + %c = add float %a, %b + ret float %c +} +