diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 23910a350e86..8320fc1b8822 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2665,15 +2665,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass.push_back(std::make_pair(unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy()))); } else { - // If we are tail calling a global or external symbol in GOT pic mode, we - // cannot use a direct jump, since that would make lazy dynamic linking - // impossible (see PR15086). So pretend this is not a tail call, to - // prevent the optimization to a jump. + // If we are tail calling and generating PIC/GOT style code load the + // address of the callee into ECX. The value in ecx is used as target of + // the tail jump. This is done to circumvent the ebx/callee-saved problem + // for tail calls on PIC/GOT architectures. Normally we would just put the + // address of GOT into ebx and then call target@PLT. But for tail calls + // ebx would be restored (since ebx is callee saved) before jumping to the + // target@PLT. + + // Note: The actual moving to ECX is done further down. GlobalAddressSDNode *G = dyn_cast(Callee); - if ((G && !G->getGlobal()->hasHiddenVisibility() && - !G->getGlobal()->hasProtectedVisibility()) || - isa(Callee)) - isTailCall = false; + if (G && !G->getGlobal()->hasHiddenVisibility() && + !G->getGlobal()->hasProtectedVisibility()) + Callee = LowerGlobalAddress(Callee, DAG); + else if (isa(Callee)) + Callee = LowerExternalSymbol(Callee, DAG); } } diff --git a/llvm/test/CodeGen/X86/tail-call-got.ll b/llvm/test/CodeGen/X86/tail-call-got.ll index bdfdeb59870f..84d561dcd8c3 100644 --- a/llvm/test/CodeGen/X86/tail-call-got.ll +++ b/llvm/test/CodeGen/X86/tail-call-got.ll @@ -5,7 +5,8 @@ target triple = "i386-unknown-freebsd9.0" define double @test1(double %x) nounwind readnone { ; CHECK-LABEL: test1: -; CHECK: calll foo@PLT +; CHECK: movl foo@GOT +; CHECK-NEXT: jmpl %1 = tail call double @foo(double %x) nounwind readnone ret double %1 } @@ -14,7 +15,8 @@ declare double @foo(double) readnone define double @test2(double %x) nounwind readnone { ; CHECK-LABEL: test2: -; CHECK: calll sin@PLT +; CHECK: movl sin@GOT +; CHECK-NEXT: jmpl %1 = tail call double @sin(double %x) nounwind readnone ret double %1 } diff --git a/llvm/test/CodeGen/X86/tailcallpic2.ll b/llvm/test/CodeGen/X86/tailcallpic2.ll index c35cee3a987b..1b6bdb769861 100644 --- a/llvm/test/CodeGen/X86/tailcallpic2.ll +++ b/llvm/test/CodeGen/X86/tailcallpic2.ll @@ -9,7 +9,7 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { entry: %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] ret i32 %tmp11 -; Note that this call via PLT could be further optimized into a direct call (no GOT, no PLT): -; CHECK: calll tailcallee@PLT +; CHECK: movl tailcallee@GOT +; CHECK: jmpl }