[CUDA] Set LLVM calling convention for CUDA kernel

Some targets need special LLVM calling convention for CUDA kernel.
This patch does that through a TargetCodeGenInfo hook.

It only affects amdgcn target.

Patch by Greg Rodgers.
Revised and lit tests added by Yaxun Liu.

Differential Revision: https://reviews.llvm.org/D45223

llvm-svn: 330447
This commit is contained in:
Yaxun Liu 2018-04-20 17:01:03 +00:00
parent e268304122
commit 4306f2086f
4 changed files with 52 additions and 0 deletions

View File

@ -3627,6 +3627,9 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
MaybeHandleStaticInExternC(D, Fn);
if (D->hasAttr<CUDAGlobalAttr>())
getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn);
maybeSetTrivialComdat(*D, *Fn);
CodeGenFunction(*this).GenerateCode(D, Fn, FI);

View File

@ -7637,6 +7637,7 @@ public:
llvm::Function *BlockInvokeFunc,
llvm::Value *BlockLiteral) const override;
bool shouldEmitStaticExternCAliases() const override;
void setCUDAKernelCallingConvention(llvm::Function *F) const override;
};
}
@ -7772,6 +7773,11 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
llvm::Function *F) const {
F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
}
//===----------------------------------------------------------------------===//
// SPARC v8 ABI Implementation.
// Based on the SPARC Compliance Definition version 2.4.1.

View File

@ -301,6 +301,8 @@ public:
/// mangled name of functions declared within an extern "C" region and marked
/// as 'used', and having internal linkage.
virtual bool shouldEmitStaticExternCAliases() const { return true; }
virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {}
};
} // namespace CodeGen

View File

@ -0,0 +1,41 @@
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
#include "Inputs/cuda.h"
// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv
class A {
public:
static __global__ void kernel(){}
};
// CHECK: define void @_Z10non_kernelv
__device__ void non_kernel(){}
// CHECK: define amdgpu_kernel void @_Z6kerneli
__global__ void kernel(int x) {
non_kernel();
}
// CHECK: define amdgpu_kernel void @_Z11EmptyKernelIvEvv
template <typename T>
__global__ void EmptyKernel(void) {}
struct Dummy {
/// Type definition of the EmptyKernel kernel entry point
typedef void (*EmptyKernelPtr)();
EmptyKernelPtr Empty() { return EmptyKernel<void>; }
};
// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_
template<class T>
__global__ void template_kernel(T x) {}
void launch(void *f);
int main() {
Dummy D;
launch((void*)A::kernel);
launch((void*)kernel);
launch((void*)template_kernel<A>);
launch((void*)D.Empty());
return 0;
}