when eliding a byval copy due to inlining a readonly function, we have
to make sure that the reused alloca has sufficient alignment. llvm-svn: 122236
This commit is contained in:
parent
0099744506
commit
0f11495289
|
@ -229,17 +229,56 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
|
||||||
CallerNode->removeCallEdgeFor(CS);
|
CallerNode->removeCallEdgeFor(CS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// HandleByValArgument - When inlining a call site that has a byval argument,
|
||||||
|
/// we have to make the implicit memcpy explicit by adding it.
|
||||||
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
|
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
|
||||||
const Function *CalledFunc,
|
const Function *CalledFunc,
|
||||||
InlineFunctionInfo &IFI,
|
InlineFunctionInfo &IFI,
|
||||||
unsigned ByValAlignment) {
|
unsigned ByValAlignment) {
|
||||||
if (CalledFunc->onlyReadsMemory())
|
const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
|
||||||
|
|
||||||
|
// If the called function is readonly, then it could not mutate the caller's
|
||||||
|
// copy of the byval'd memory. In this case, it is safe to elide the copy and
|
||||||
|
// temporary.
|
||||||
|
if (CalledFunc->onlyReadsMemory()) {
|
||||||
|
// If the byval argument has a specified alignment that is greater than the
|
||||||
|
// passed in pointer, then we either have to round up the input pointer or
|
||||||
|
// give up on this transformation.
|
||||||
|
if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
|
||||||
return Arg;
|
return Arg;
|
||||||
|
|
||||||
|
// See if the argument is a (bitcasted) pointer to an alloca. If so, we can
|
||||||
|
// round up the alloca if needed.
|
||||||
|
if (AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts())) {
|
||||||
|
unsigned AIAlign = AI->getAlignment();
|
||||||
|
|
||||||
|
// If the alloca is known at least aligned as much as the byval, we can do
|
||||||
|
// this optimization.
|
||||||
|
if (AIAlign >= ByValAlignment)
|
||||||
|
return Arg;
|
||||||
|
|
||||||
|
// If the alloca has a specified alignment that is less than the byval,
|
||||||
|
// then we can safely bump it up.
|
||||||
|
if (AIAlign) {
|
||||||
|
AI->setAlignment(ByValAlignment);
|
||||||
|
return Arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the alignment has an unspecified alignment, then we can only modify
|
||||||
|
// it if we have TD information. Doing so without TD info could end up
|
||||||
|
// with us rounding the alignment *down* accidentally, which is badness.
|
||||||
|
if (IFI.TD) {
|
||||||
|
AIAlign = std::max(ByValAlignment, IFI.TD->getPrefTypeAlignment(AggTy));
|
||||||
|
AI->setAlignment(AIAlign);
|
||||||
|
return Arg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, we have to make a memcpy to get a safe alignment, pretty lame.
|
||||||
|
}
|
||||||
|
|
||||||
LLVMContext &Context = Arg->getContext();
|
LLVMContext &Context = Arg->getContext();
|
||||||
|
|
||||||
|
|
||||||
const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
|
|
||||||
const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
|
const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
|
||||||
|
|
||||||
// Create the alloca. If we have TargetData, use nice alignment.
|
// Create the alloca. If we have TargetData, use nice alignment.
|
||||||
|
|
|
@ -81,3 +81,26 @@ entry:
|
||||||
; CHECK: call void @g3(%struct.ss* %S1)
|
; CHECK: call void @g3(%struct.ss* %S1)
|
||||||
; CHECK: ret void
|
; CHECK: ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Inlining a byval struct should NOT cause an explicit copy
|
||||||
|
; into an alloca if the function is readonly, but should increase an alloca's
|
||||||
|
; alignment to satisfy an explicit alignment request.
|
||||||
|
|
||||||
|
define internal i32 @f4(%struct.ss* byval align 64 %b) nounwind readonly {
|
||||||
|
call void @g3(%struct.ss* %b)
|
||||||
|
ret i32 4
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @test4() nounwind {
|
||||||
|
entry:
|
||||||
|
%S = alloca %struct.ss, align 2 ; <%struct.ss*> [#uses=4]
|
||||||
|
%X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind
|
||||||
|
ret i32 %X
|
||||||
|
; CHECK: @test4()
|
||||||
|
; CHECK: %S = alloca %struct.ss, align 64
|
||||||
|
; CHECK-NOT: call void @llvm.memcpy
|
||||||
|
; CHECK: call void @g3
|
||||||
|
; CHECK: ret i32 4
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue