Allow compatible extension attributes for tail calls

If the tail-callee and caller give the same bits via the same signext/zeroext
attribute then a tail-call should be allowed, since the extension has already
been done by the callee.

llvm-svn: 188159
This commit is contained in:
Tim Northover 2013-08-12 09:45:46 +00:00
parent c9b7d47b21
commit 707d68f082
3 changed files with 95 additions and 17 deletions

View File

@ -320,6 +320,7 @@ static const Value *getNoopInput(const Value *V,
static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
SmallVectorImpl<unsigned> &RetIndices,
SmallVectorImpl<unsigned> &CallIndices,
bool AllowDifferingSizes,
const TargetLoweringBase &TLI) {
// Trace the sub-value needed by the return value as far back up the graph as
@ -350,7 +351,8 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
// all the bits that are needed by the "ret" have been provided by the "tail
// call". FIXME: with sufficiently cunning bit-tracking, we could look through
// extensions too.
if (BitsProvided < BitsRequired)
if (BitsProvided < BitsRequired ||
(!AllowDifferingSizes && BitsProvided != BitsRequired))
return false;
return true;
@ -516,19 +518,38 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
// return type is.
if (isa<UndefValue>(Ret->getOperand(0))) return true;
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
const Function *F = ExitBB->getParent();
AttributeSet CallerAttrs = F->getAttributes();
if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
removeAttribute(Attribute::NoAlias) !=
AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
removeAttribute(Attribute::NoAlias))
return false;
// Make sure the attributes attached to each return are compatible.
AttrBuilder CallerAttrs(ExitBB->getParent()->getAttributes(),
AttributeSet::ReturnIndex);
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
AttributeSet::ReturnIndex);
// It's not safe to eliminate the sign / zero extension of the return value.
if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
// Noalias is completely benign as far as calling convention goes, it
// shouldn't affect whether the call is a tail call.
CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias);
CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias);
bool AllowDifferingSizes = true;
if (CallerAttrs.contains(Attribute::ZExt)) {
if (!CalleeAttrs.contains(Attribute::ZExt))
return false;
AllowDifferingSizes = false;
CallerAttrs.removeAttribute(Attribute::ZExt);
CalleeAttrs.removeAttribute(Attribute::ZExt);
} else if (CallerAttrs.contains(Attribute::SExt)) {
if (!CalleeAttrs.contains(Attribute::SExt))
return false;
AllowDifferingSizes = false;
CallerAttrs.removeAttribute(Attribute::SExt);
CalleeAttrs.removeAttribute(Attribute::SExt);
}
// If they're still different, there's some facet we don't understand
// (currently only "inreg", but in future who knows). It may be OK but the
// only safe option is to reject the tail call.
if (CallerAttrs != CalleeAttrs)
return false;
const Value *RetVal = Ret->getOperand(0), *CallVal = I;
@ -570,7 +591,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
// Finally, we can check whether the value produced by the tail call at this
// index is compatible with the value we return.
if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, TLI))
if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
AllowDifferingSizes, TLI))
return false;
CallEmpty = !nextRealType(CallSubTypes, CallPath);

View File

@ -106,10 +106,10 @@ declare i32 @bar2(i32, i32, i32)
define signext i16 @t8() nounwind ssp {
entry:
; 32-LABEL: t8:
; 32: calll {{_?}}bar3
; 32: jmp {{_?}}bar3
; 64-LABEL: t8:
; 64: callq {{_?}}bar3
; 64: jmp {{_?}}bar3
%0 = tail call signext i16 @bar3() nounwind ; <i16> [#uses=1]
ret i16 %0
}
@ -122,7 +122,7 @@ entry:
; 32: calll *
; 64-LABEL: t9:
; 64: callq *
; 64: jmpq *
%0 = bitcast i32 (i32)* %x to i16 (i32)*
%1 = tail call signext i16 %0(i32 0) nounwind
ret i16 %1

View File

@ -0,0 +1,56 @@
; RUN: llc -mtriple=x86_64-apple-darwin -o - %s | FileCheck %s
; Simple case: completely identical returns, even with extensions, shouldn't be
; a barrier to tail calls.
declare zeroext i1 @give_bool()
define zeroext i1 @test_bool() {
; CHECK-LABEL: test_bool:
; CHECK: jmp
%call = tail call zeroext i1 @give_bool()
ret i1 %call
}
; Here, there's more zero extension to be done between the call and the return,
; so a tail call is impossible (well, according to current Clang practice
; anyway. The AMD64 ABI isn't crystal clear on the matter).
declare zeroext i32 @give_i32()
define zeroext i8 @test_i32() {
; CHECK-LABEL: test_i32:
; CHECK: callq _give_i32
; CHECK: movzbl %al, %eax
; CHECK: ret
%call = tail call zeroext i32 @give_i32()
%val = trunc i32 %call to i8
ret i8 %val
}
; Here, one function is zeroext and the other is signext. To the extent that
; these both mean something they are incompatible so no tail call is possible.
declare zeroext i16 @give_unsigned_i16()
define signext i16 @test_incompatible_i16() {
; CHECK-LABEL: test_incompatible_i16:
; CHECK: callq _give_unsigned_i16
; CHECK: cwtl
; CHECK: ret
%call = tail call zeroext i16 @give_unsigned_i16()
ret i16 %call
}
declare inreg i32 @give_i32_inreg()
define i32 @test_inreg_to_normal() {
; CHECK-LABEL: test_inreg_to_normal:
; CHECK: callq _give_i32_inreg
; CHECK: ret
%val = tail call inreg i32 @give_i32_inreg()
ret i32 %val
}
define inreg i32 @test_normal_to_inreg() {
; CHECK-LABEL: test_normal_to_inreg:
; CHECK: callq _give_i32
; CHECK: ret
%val = tail call i32 @give_i32()
ret i32 %val
}