[CUDA] Allow function overloads in CUDA based on host/device attributes.

The patch makes it possible to parse CUDA files that contain host/device
functions with identical signatures, but different attributes without
having to physically split source into host-only and device-only parts.

This change is needed in order to parse CUDA header files that have
a lot of name clashes with standard include files.

Gory details are in design doc here: https://goo.gl/EXnymm
Feel free to leave comments there or in this review thread.

This feature is controlled with CC1 option -fcuda-target-overloads
and is disabled by default.

Differential Revision: http://reviews.llvm.org/D12453

llvm-svn: 248295
This commit is contained in:
Artem Belevich 2015-09-22 17:22:59 +00:00
parent 81616a72ea
commit 94a55e8169
10 changed files with 757 additions and 1 deletions

View File

@ -166,6 +166,7 @@ LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")
LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device")
LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions")
LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)")
LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes")
LANGOPT(CUDAUsesLibDevice , 1, 0, "Selectively link and internalize bitcode.")
LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")

View File

@ -659,6 +659,8 @@ def fcuda_disable_target_call_checks : Flag<["-"],
HelpText<"Disable all cross-target (host, device, etc.) call checks in CUDA">;
def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,
HelpText<"Incorporate CUDA device-side binary into host object file.">;
def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">,
HelpText<"Enable function overloads based on CUDA target attributes.">;
def fcuda_uses_libdevice : Flag<["-"], "fcuda-uses-libdevice">,
HelpText<"Selectively link and internalize bitcode.">;

View File

@ -8613,8 +8613,37 @@ public:
CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
enum CUDAFunctionPreference {
CFP_Never, // Invalid caller/callee combination.
CFP_LastResort, // Lowest priority. Only in effect if
// LangOpts.CUDADisableTargetCallChecks is true.
CFP_Fallback, // Low priority caller/callee combination
CFP_Best, // Preferred caller/callee combination
};
/// Identifies relative preference of a given Caller/Callee
/// combination, based on their host/device attributes.
/// \param Caller function which needs address of \p Callee.
/// nullptr in case of global context.
/// \param Callee target function
///
/// \returns preference value for particular Caller/Callee combination.
CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
const FunctionDecl *Callee);
bool CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee);
/// Finds a function in \p Matches with highest calling priority
/// from \p Caller context and erases all functions with lower
/// calling priority.
void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
SmallVectorImpl<FunctionDecl *> &Matches);
void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
SmallVectorImpl<DeclAccessPair> &Matches);
void EraseUnwantedCUDAMatches(
const FunctionDecl *Caller,
SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);
/// Given a implicit special member, infer its CUDA target from the
/// calls it needs to make to underlying base/field special members.
/// \param ClassDecl the class for which the member is being created.

View File

@ -1416,6 +1416,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
if (Args.hasArg(OPT_fcuda_disable_target_call_checks))
Opts.CUDADisableTargetCallChecks = 1;
if (Args.hasArg(OPT_fcuda_target_overloads))
Opts.CUDATargetOverloads = 1;
if (Opts.ObjC1) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
StringRef value = arg->getValue();

View File

@ -60,8 +60,101 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
return CFT_Host;
}
// * CUDA Call preference table
//
// F - from,
// T - to
// Ph - preference in host mode
// Pd - preference in device mode
// H - handled in (x)
// Preferences: b-best, f-fallback, l-last resort, n-never.
//
// | F | T | Ph | Pd | H |
// |----+----+----+----+-----+
// | d | d | b | b | (b) |
// | d | g | n | n | (a) |
// | d | h | l | l | (e) |
// | d | hd | f | f | (c) |
// | g | d | b | b | (b) |
// | g | g | n | n | (a) |
// | g | h | l | l | (e) |
// | g | hd | f | f | (c) |
// | h | d | l | l | (e) |
// | h | g | b | b | (b) |
// | h | h | b | b | (b) |
// | h | hd | f | f | (c) |
// | hd | d | l | f | (d) |
// | hd | g | f | n |(d/a)|
// | hd | h | f | l | (d) |
// | hd | hd | b | b | (b) |
Sema::CUDAFunctionPreference
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
const FunctionDecl *Callee) {
assert(getLangOpts().CUDATargetOverloads &&
"Should not be called w/o enabled target overloads.");
assert(Callee && "Callee must be valid.");
CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
CUDAFunctionTarget CallerTarget =
(Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
// If one of the targets is invalid, the check always fails, no matter what
// the other target is.
if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
return CFP_Never;
// (a) Can't call global from some contexts until we support CUDA's
// dynamic parallelism.
if (CalleeTarget == CFT_Global &&
(CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
(CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
return CFP_Never;
// (b) Best case scenarios
if (CalleeTarget == CallerTarget ||
(CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
return CFP_Best;
// (c) Calling HostDevice is OK as a fallback that works for everyone.
if (CalleeTarget == CFT_HostDevice)
return CFP_Fallback;
// Figure out what should be returned 'last resort' cases. Normally
// those would not be allowed, but we'll consider them if
// CUDADisableTargetCallChecks is true.
CUDAFunctionPreference QuestionableResult =
getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
// (d) HostDevice behavior depends on compilation mode.
if (CallerTarget == CFT_HostDevice) {
// Calling a function that matches compilation mode is OK.
// Calling a function from the other side is frowned upon.
if (getLangOpts().CUDAIsDevice)
return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
else
return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
? CFP_Fallback
: QuestionableResult;
}
// (e) Calling across device/host boundary is not something you should do.
if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
(CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
(CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
return QuestionableResult;
llvm_unreachable("All cases should've been handled by now.");
}
bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
const FunctionDecl *Callee) {
// With target overloads enabled, we only disallow calling
// combinations with CFP_Never.
if (getLangOpts().CUDATargetOverloads)
return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
// The CUDADisableTargetCallChecks short-circuits this check: we assume all
// cross-target calls are valid.
if (getLangOpts().CUDADisableTargetCallChecks)
@ -117,6 +210,57 @@ bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
return false;
}
template <typename T, typename FetchDeclFn>
static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
llvm::SmallVectorImpl<T> &Matches,
FetchDeclFn FetchDecl) {
assert(S.getLangOpts().CUDATargetOverloads &&
"Should not be called w/o enabled target overloads.");
if (Matches.size() <= 1)
return;
// Find the best call preference among the functions in Matches.
Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
for (auto const &Match : Matches) {
P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
if (P > BestCFP)
BestCFP = P;
}
// Erase all functions with lower priority.
for (unsigned I = 0, N = Matches.size(); I != N;)
if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
Matches[I] = Matches[--N];
Matches.resize(N);
} else {
++I;
}
}
void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
SmallVectorImpl<FunctionDecl *> &Matches){
EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
*this, Caller, Matches, [](const FunctionDecl *item) { return item; });
}
void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
SmallVectorImpl<DeclAccessPair> &Matches) {
EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
*this, Caller, Matches, [](const DeclAccessPair &item) {
return dyn_cast<FunctionDecl>(item.getDecl());
});
}
void Sema::EraseUnwantedCUDAMatches(
const FunctionDecl *Caller,
SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
*this, Caller, Matches,
[](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
return dyn_cast<FunctionDecl>(item.second);
});
}
/// When an implicitly-declared special member has to invoke more than one
/// base/field special member, conflicts may occur in the targets of these
/// members. For example, if one base's member __host__ and another's is

View File

@ -5515,6 +5515,12 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) {
// In C++, the overloadable attribute negates the effects of extern "C".
if (!D->isInExternCContext() || D->template hasAttr<OverloadableAttr>())
return false;
// So do CUDA's host/device attributes if overloading is enabled.
if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
(D->template hasAttr<CUDADeviceAttr>() ||
D->template hasAttr<CUDAHostAttr>()))
return false;
}
return D->isExternC();
}

View File

@ -2265,6 +2265,9 @@ FunctionDecl *Sema::FindUsualDeallocationFunction(SourceLocation StartLoc,
"found an unexpected usual deallocation function");
}
if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
assert(Matches.size() == 1 &&
"unexpectedly have multiple usual deallocation functions");
return Matches.front();
@ -2296,6 +2299,9 @@ bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD,
Matches.push_back(F.getPair());
}
if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
// There's exactly one suitable operator; pick it.
if (Matches.size() == 1) {
Operator = cast<CXXMethodDecl>(Matches[0]->getUnderlyingDecl());

View File

@ -1072,6 +1072,25 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
return true;
}
if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) {
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
OldTarget = IdentifyCUDATarget(Old);
if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)
return false;
assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target.");
// Don't allow mixing of HD with other kinds. This guarantees that
// we have only one viable function with this signature on any
// side of CUDA compilation .
if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice))
return false;
// Allow overloading of functions with same signature, but
// different CUDA target attributes.
return NewTarget != OldTarget;
}
// The signatures match; this is not an overload.
return false;
}
@ -8508,6 +8527,13 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
return true;
}
if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
Cand1.Function && Cand2.Function) {
FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
S.IdentifyCUDAPreference(Caller, Cand2.Function);
}
return false;
}
@ -9925,6 +9951,10 @@ public:
EliminateAllExceptMostSpecializedTemplate();
}
}
if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
Matches.size() > 1)
EliminateSuboptimalCudaMatches();
}
private:
@ -10100,11 +10130,15 @@ private:
++I;
else {
Matches[I] = Matches[--N];
Matches.set_size(N);
Matches.resize(N);
}
}
}
void EliminateSuboptimalCudaMatches() {
S.EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(S.CurContext), Matches);
}
public:
void ComplainNoMatchesFound() const {
assert(Matches.empty());

View File

@ -0,0 +1,214 @@
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
// Make sure we handle target overloads correctly.
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
// Check target overloads handling with disabled call target checks.
// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \
// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \
// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s
// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \
// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \
// RUN: -fcuda-is-device -o - %s \
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \
// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s
#include "Inputs/cuda.h"
typedef int (*fp_t)(void);
typedef void (*gp_t)(void);
// CHECK-HOST: @hp = global i32 ()* @_Z1hv
// CHECK-HOST: @chp = global i32 ()* @ch
// CHECK-HOST: @dhp = global i32 ()* @_Z2dhv
// CHECK-HOST: @cdhp = global i32 ()* @cdh
// CHECK-HOST: @gp = global void ()* @_Z1gv
// CHECK-BOTH-LABEL: define i32 @_Z2dhv()
__device__ int dh(void) { return 1; }
// CHECK-DEVICE: ret i32 1
__host__ int dh(void) { return 2; }
// CHECK-HOST: ret i32 2
// CHECK-BOTH-LABEL: define i32 @_Z2hdv()
__host__ __device__ int hd(void) { return 3; }
// CHECK-BOTH: ret i32 3
// CHECK-DEVICE-LABEL: define i32 @_Z1dv()
__device__ int d(void) { return 8; }
// CHECK-DEVICE: ret i32 8
// CHECK-HOST-LABEL: define i32 @_Z1hv()
__host__ int h(void) { return 9; }
// CHECK-HOST: ret i32 9
// CHECK-BOTH-LABEL: define void @_Z1gv()
__global__ void g(void) {}
// CHECK-BOTH: ret void
// mangled names of extern "C" __host__ __device__ functions clash
// with those of their __host__/__device__ counterparts, so
// overloading of extern "C" functions can only happen for __host__
// and __device__ functions -- we never codegen them in the same
// compilation and therefore mangled name conflict is not a problem.
// CHECK-BOTH-LABEL: define i32 @cdh()
extern "C" __device__ int cdh(void) {return 10;}
// CHECK-DEVICE: ret i32 10
extern "C" __host__ int cdh(void) {return 11;}
// CHECK-HOST: ret i32 11
// CHECK-DEVICE-LABEL: define i32 @cd()
extern "C" __device__ int cd(void) {return 12;}
// CHECK-DEVICE: ret i32 12
// CHECK-HOST-LABEL: define i32 @ch()
extern "C" __host__ int ch(void) {return 13;}
// CHECK-HOST: ret i32 13
// CHECK-BOTH-LABEL: define i32 @chd()
extern "C" __host__ __device__ int chd(void) {return 14;}
// CHECK-BOTH: ret i32 14
// CHECK-HOST-LABEL: define void @_Z5hostfv()
__host__ void hostf(void) {
#if defined (NOCHECKS)
fp_t dp = d; // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp,
#endif
fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp,
fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp,
fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp,
fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp,
fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp,
fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp,
gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp,
#if defined (NOCHECKS)
d(); // CHECK-HOST-NC: call i32 @_Z1dv()
cd(); // CHECK-HOST-NC: call i32 @cd()
#endif
h(); // CHECK-HOST: call i32 @_Z1hv()
ch(); // CHECK-HOST: call i32 @ch()
dh(); // CHECK-HOST: call i32 @_Z2dhv()
cdh(); // CHECK-HOST: call i32 @cdh()
g<<<0,0>>>(); // CHECK-HOST: call void @_Z1gv()
}
// CHECK-DEVICE-LABEL: define void @_Z7devicefv()
__device__ void devicef(void) {
fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp,
fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp,
#if defined (NOCHECKS)
fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp,
#endif
fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp,
fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp,
fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp,
fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp,
d(); // CHECK-DEVICE: call i32 @_Z1dv()
cd(); // CHECK-DEVICE: call i32 @cd()
#if defined (NOCHECKS)
h(); // CHECK-DEVICE-NC: call i32 @_Z1hv()
ch(); // CHECK-DEVICE-NC: call i32 @ch()
#endif
dh(); // CHECK-DEVICE: call i32 @_Z2dhv()
cdh(); // CHECK-DEVICE: call i32 @cdh()
}
// CHECK-BOTH-LABEL: define void @_Z11hostdevicefv()
__host__ __device__ void hostdevicef(void) {
#if defined (NOCHECKS)
fp_t dp = d; // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
fp_t cdp = cd; // CHECK-BOTH-NC: store {{.*}} @cd, {{.*}} %cdp,
fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
fp_t chp = ch; // CHECK-BOTH-NC: store {{.*}} @ch, {{.*}} %chp,
#endif
fp_t dhp = dh; // CHECK-BOTH: store {{.*}} @_Z2dhv, {{.*}} %dhp,
fp_t cdhp = cdh; // CHECK-BOTH: store {{.*}} @cdh, {{.*}} %cdhp,
fp_t hdp = hd; // CHECK-BOTH: store {{.*}} @_Z2hdv, {{.*}} %hdp,
fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp,
#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
gp_t gp = g; // CHECK-HOST-NC: store {{.*}} @_Z1gv, {{.*}} %gp,
#endif
#if defined (NOCHECKS)
d(); // CHECK-BOTH-NC: call i32 @_Z1dv()
cd(); // CHECK-BOTH-NC: call i32 @cd()
h(); // CHECK-BOTH-NC: call i32 @_Z1hv()
ch(); // CHECK-BOTH-NC: call i32 @ch()
#endif
dh(); // CHECK-BOTH: call i32 @_Z2dhv()
cdh(); // CHECK-BOTH: call i32 @cdh()
#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
g<<<0,0>>>(); // CHECK-HOST-NC: call void @_Z1gv()
#endif
}
// Test for address of overloaded function resolution in the global context.
fp_t hp = h;
fp_t chp = ch;
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g;
int x;
// Check constructors/destructors for D/H functions
struct s_cd_dh {
__host__ s_cd_dh() { x = 11; }
__device__ s_cd_dh() { x = 12; }
__host__ ~s_cd_dh() { x = 21; }
__device__ ~s_cd_dh() { x = 22; }
};
struct s_cd_hd {
__host__ __device__ s_cd_hd() { x = 31; }
__host__ __device__ ~s_cd_hd() { x = 32; }
};
// CHECK-BOTH: define void @_Z7wrapperv
#if defined(__CUDA_ARCH__)
__device__
#else
__host__
#endif
void wrapper() {
s_cd_dh scddh;
// CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev(
s_cd_hd scdhd;
// CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev
// CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev(
// CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev(
}
// CHECK-BOTH: ret void
// Now it's time to check what's been generated for the methods we used.
// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev(
// CHECK-HOST: store i32 11,
// CHECK-DEVICE: store i32 12,
// CHECK-BOTH: ret void
// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev(
// CHECK-BOTH: store i32 31,
// CHECK-BOTH: ret void
// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
// CHECK-BOTH: store i32 32,
// CHECK-BOTH: ret void
// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev(
// CHECK-HOST: store i32 21,
// CHECK-DEVICE: store i32 22,
// CHECK-BOTH: ret void

View File

@ -0,0 +1,317 @@
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
// Make sure we handle target overloads correctly.
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
// RUN: -fsyntax-only -fcuda-target-overloads -verify %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
// RUN: -fsyntax-only -fcuda-target-overloads -fcuda-is-device -verify %s
// Check target overloads handling with disabled call target checks.
// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -fsyntax-only \
// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -verify %s
// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -fsyntax-only \
// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \
// RUN: -fcuda-is-device -verify %s
#include "Inputs/cuda.h"
typedef int (*fp_t)(void);
typedef void (*gp_t)(void);
// Host and unattributed functions can't be overloaded
__host__ int hh(void) { return 1; } // expected-note {{previous definition is here}}
int hh(void) { return 1; } // expected-error {{redefinition of 'hh'}}
// H/D overloading is OK
__host__ int dh(void) { return 2; }
__device__ int dh(void) { return 2; }
// H/HD and D/HD are not allowed
__host__ __device__ int hdh(void) { return 5; } // expected-note {{previous definition is here}}
__host__ int hdh(void) { return 4; } // expected-error {{redefinition of 'hdh'}}
__host__ int hhd(void) { return 4; } // expected-note {{previous definition is here}}
__host__ __device__ int hhd(void) { return 5; } // expected-error {{redefinition of 'hhd'}}
// expected-warning@-1 {{attribute declaration must precede definition}}
// expected-note@-3 {{previous definition is here}}
__host__ __device__ int hdd(void) { return 7; } // expected-note {{previous definition is here}}
__device__ int hdd(void) { return 6; } // expected-error {{redefinition of 'hdd'}}
__device__ int dhd(void) { return 6; } // expected-note {{previous definition is here}}
__host__ __device__ int dhd(void) { return 7; } // expected-error {{redefinition of 'dhd'}}
// expected-warning@-1 {{attribute declaration must precede definition}}
// expected-note@-3 {{previous definition is here}}
// Same tests for extern "C" functions
extern "C" __host__ int chh(void) {return 11;} // expected-note {{previous definition is here}}
extern "C" int chh(void) {return 11;} // expected-error {{redefinition of 'chh'}}
// H/D overloading is OK
extern "C" __device__ int cdh(void) {return 10;}
extern "C" __host__ int cdh(void) {return 11;}
// H/HD and D/HD overloading is not allowed.
extern "C" __host__ __device__ int chhd1(void) {return 12;} // expected-note {{previous definition is here}}
extern "C" __host__ int chhd1(void) {return 13;} // expected-error {{redefinition of 'chhd1'}}
extern "C" __host__ int chhd2(void) {return 13;} // expected-note {{previous definition is here}}
extern "C" __host__ __device__ int chhd2(void) {return 12;} // expected-error {{redefinition of 'chhd2'}}
// expected-warning@-1 {{attribute declaration must precede definition}}
// expected-note@-3 {{previous definition is here}}
// Helper functions to verify calling restrictions.
__device__ int d(void) { return 8; }
__host__ int h(void) { return 9; }
__global__ void g(void) {}
extern "C" __device__ int cd(void) {return 10;}
extern "C" __host__ int ch(void) {return 11;}
__host__ void hostf(void) {
fp_t dp = d;
fp_t cdp = cd;
#if !defined(NOCHECKS)
// expected-error@-3 {{reference to __device__ function 'd' in __host__ function}}
// expected-note@65 {{'d' declared here}}
// expected-error@-4 {{reference to __device__ function 'cd' in __host__ function}}
// expected-note@68 {{'cd' declared here}}
#endif
fp_t hp = h;
fp_t chp = ch;
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g;
d();
cd();
#if !defined(NOCHECKS)
// expected-error@-3 {{no matching function for call to 'd'}}
// expected-note@65 {{candidate function not viable: call to __device__ function from __host__ function}}
// expected-error@-4 {{no matching function for call to 'cd'}}
// expected-note@68 {{candidate function not viable: call to __device__ function from __host__ function}}
#endif
h();
ch();
dh();
cdh();
g(); // expected-error {{call to global function g not configured}}
g<<<0,0>>>();
}
__device__ void devicef(void) {
fp_t dp = d;
fp_t cdp = cd;
fp_t hp = h;
fp_t chp = ch;
#if !defined(NOCHECKS)
// expected-error@-3 {{reference to __host__ function 'h' in __device__ function}}
// expected-note@66 {{'h' declared here}}
// expected-error@-4 {{reference to __host__ function 'ch' in __device__ function}}
// expected-note@69 {{'ch' declared here}}
#endif
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
// expected-note@67 {{'g' declared here}}
d();
cd();
h();
ch();
#if !defined(NOCHECKS)
// expected-error@-3 {{no matching function for call to 'h'}}
// expected-note@66 {{candidate function not viable: call to __host__ function from __device__ function}}
// expected-error@-4 {{no matching function for call to 'ch'}}
// expected-note@69 {{candidate function not viable: call to __host__ function from __device__ function}}
#endif
dh();
cdh();
g(); // expected-error {{no matching function for call to 'g'}}
// expected-note@67 {{candidate function not viable: call to __global__ function from __device__ function}}
g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}}
// expected-note@67 {{'g' declared here}}
}
__global__ void globalf(void) {
fp_t dp = d;
fp_t cdp = cd;
fp_t hp = h;
fp_t chp = ch;
#if !defined(NOCHECKS)
// expected-error@-3 {{reference to __host__ function 'h' in __global__ function}}
// expected-note@66 {{'h' declared here}}
// expected-error@-4 {{reference to __host__ function 'ch' in __global__ function}}
// expected-note@69 {{'ch' declared here}}
#endif
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
// expected-note@67 {{'g' declared here}}
d();
cd();
h();
ch();
#if !defined(NOCHECKS)
// expected-error@-3 {{no matching function for call to 'h'}}
// expected-note@66 {{candidate function not viable: call to __host__ function from __global__ function}}
// expected-error@-4 {{no matching function for call to 'ch'}}
// expected-note@69 {{candidate function not viable: call to __host__ function from __global__ function}}
#endif
dh();
cdh();
g(); // expected-error {{no matching function for call to 'g'}}
// expected-note@67 {{candidate function not viable: call to __global__ function from __global__ function}}
g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}}
// expected-note@67 {{'g' declared here}}
}
__host__ __device__ void hostdevicef(void) {
fp_t dp = d;
fp_t cdp = cd;
fp_t hp = h;
fp_t chp = ch;
#if !defined(NOCHECKS)
#if !defined(__CUDA_ARCH__)
// expected-error@-6 {{reference to __device__ function 'd' in __host__ __device__ function}}
// expected-note@65 {{'d' declared here}}
// expected-error@-7 {{reference to __device__ function 'cd' in __host__ __device__ function}}
// expected-note@68 {{'cd' declared here}}
#else
// expected-error@-9 {{reference to __host__ function 'h' in __host__ __device__ function}}
// expected-note@66 {{'h' declared here}}
// expected-error@-10 {{reference to __host__ function 'ch' in __host__ __device__ function}}
// expected-note@69 {{'ch' declared here}}
#endif
#endif
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g;
#if defined(__CUDA_ARCH__)
// expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
// expected-note@67 {{'g' declared here}}
#endif
d();
cd();
h();
ch();
#if !defined(NOCHECKS)
#if !defined(__CUDA_ARCH__)
// expected-error@-6 {{no matching function for call to 'd'}}
// expected-note@65 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
// expected-error@-7 {{no matching function for call to 'cd'}}
// expected-note@68 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
#else
// expected-error@-9 {{no matching function for call to 'h'}}
// expected-note@66 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
// expected-error@-10 {{no matching function for call to 'ch'}}
// expected-note@69 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
#endif
#endif
dh();
cdh();
g();
g<<<0,0>>>();
#if !defined(__CUDA_ARCH__)
// expected-error@-3 {{call to global function g not configured}}
#else
// expected-error@-5 {{no matching function for call to 'g'}}
// expected-note@67 {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
// expected-error@-6 {{reference to __global__ function 'g' in __host__ __device__ function}}
// expected-note@67 {{'g' declared here}}
#endif // __CUDA_ARCH__
}
// Test for address of overloaded function resolution in the global context.
fp_t hp = h;
fp_t chp = ch;
fp_t dhp = dh;
fp_t cdhp = cdh;
gp_t gp = g;
// Test overloading of destructors
// Can't mix H and unattributed destructors
struct d_h {
~d_h() {} // expected-note {{previous declaration is here}}
__host__ ~d_h() {} // expected-error {{destructor cannot be redeclared}}
};
// H/D overloading is OK
struct d_dh {
__device__ ~d_dh() {}
__host__ ~d_dh() {}
};
// HD is OK
struct d_hd {
__host__ __device__ ~d_hd() {}
};
// Mixing H/D and HD is not allowed.
struct d_dhhd {
__device__ ~d_dhhd() {}
__host__ ~d_dhhd() {} // expected-note {{previous declaration is here}}
__host__ __device__ ~d_dhhd() {} // expected-error {{destructor cannot be redeclared}}
};
struct d_hhd {
__host__ ~d_hhd() {} // expected-note {{previous declaration is here}}
__host__ __device__ ~d_hhd() {} // expected-error {{destructor cannot be redeclared}}
};
struct d_hdh {
__host__ __device__ ~d_hdh() {} // expected-note {{previous declaration is here}}
__host__ ~d_hdh() {} // expected-error {{destructor cannot be redeclared}}
};
struct d_dhd {
__device__ ~d_dhd() {} // expected-note {{previous declaration is here}}
__host__ __device__ ~d_dhd() {} // expected-error {{destructor cannot be redeclared}}
};
struct d_hdd {
__host__ __device__ ~d_hdd() {} // expected-note {{previous declaration is here}}
__device__ ~d_hdd() {} // expected-error {{destructor cannot be redeclared}}
};
// Test overloading of member functions
struct m_h {
void operator delete(void *ptr); // expected-note {{previous declaration is here}}
__host__ void operator delete(void *ptr); // expected-error {{class member cannot be redeclared}}
};
// D/H overloading is OK
struct m_dh {
__device__ void operator delete(void *ptr);
__host__ void operator delete(void *ptr);
};
// HD by itself is OK
struct m_hd {
__device__ __host__ void operator delete(void *ptr);
};
struct m_hhd {
__host__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
__host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
};
struct m_hdh {
__host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
__host__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
};
struct m_dhd {
__device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
__host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
};
struct m_hdd {
__host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
__device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
};