[CUDA] pass debug options to ptxas.
ptxas optimizations are disabled if we need to generate debug info as ptxas does not accept '-g' otherwise. Differential Revision: http://reviews.llvm.org/D17111 llvm-svn: 261018
This commit is contained in:
parent
25628d3362
commit
0a0e54c194
|
@ -378,6 +378,8 @@ def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">,
|
|||
Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">;
|
||||
def cuda_host_only : Flag<["--"], "cuda-host-only">,
|
||||
HelpText<"Do host-side CUDA compilation only">;
|
||||
def cuda_noopt_device_debug : Flag<["--"], "cuda-noopt-device-debug">,
|
||||
HelpText<"Enable device-side debug info generation. Disables ptxas optimizations.">;
|
||||
def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group<i_Group>,
|
||||
HelpText<"CUDA installation path">;
|
||||
def dA : Flag<["-"], "dA">, Group<d_Group>;
|
||||
|
|
|
@ -10691,15 +10691,20 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
assert(gpu_archs.size() == 1 && "Exactly one GPU Arch required for ptxas.");
|
||||
const std::string& gpu_arch = gpu_archs[0];
|
||||
|
||||
|
||||
ArgStringList CmdArgs;
|
||||
CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
|
||||
if (Args.getLastArg(options::OPT_cuda_noopt_device_debug)) {
|
||||
// ptxas does not accept -g option if optimization is enabled, so
|
||||
// we ignore the compiler's -O* options if we want debug info.
|
||||
CmdArgs.push_back("-g");
|
||||
CmdArgs.push_back("--dont-merge-basicblocks");
|
||||
CmdArgs.push_back("--return-at-end");
|
||||
} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
|
||||
// Map the -O we received to -O{0,1,2,3}.
|
||||
//
|
||||
// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
|
||||
// default, so it may correspond more closely to the spirit of clang -O2.
|
||||
|
||||
// Map the -O we received to -O{0,1,2,3}.
|
||||
//
|
||||
// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's default,
|
||||
// so it may correspond more closely to the spirit of clang -O2.
|
||||
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
|
||||
// -O3 seems like the least-bad option when -Osomething is specified to
|
||||
// clang but it isn't handled below.
|
||||
StringRef OOpt = "3";
|
||||
|
@ -10725,9 +10730,6 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
CmdArgs.push_back("-O0");
|
||||
}
|
||||
|
||||
// Don't bother passing -g to ptxas: It's enabled by default at -O0, and
|
||||
// not supported at other optimization levels.
|
||||
|
||||
CmdArgs.push_back("--gpu-name");
|
||||
CmdArgs.push_back(Args.MakeArgString(gpu_arch));
|
||||
CmdArgs.push_back("--output-file");
|
||||
|
|
|
@ -18,6 +18,10 @@
|
|||
// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
|
||||
|
||||
// With debugging enabled, ptxas should be run with with no ptxas optimizations.
|
||||
// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s
|
||||
|
||||
// Regular compile without -O. This should result in us passing -O0 to ptxas.
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
|
||||
|
@ -59,9 +63,14 @@
|
|||
// ARCH64: "-m64"
|
||||
// ARCH32: "-m32"
|
||||
// OPT0: "-O0"
|
||||
// OPT0-NOT: "-g"
|
||||
// OPT1: "-O1"
|
||||
// OPT1-NOT: "-g"
|
||||
// OPT2: "-O2"
|
||||
// OPT2-NOT: "-g"
|
||||
// OPT3: "-O3"
|
||||
// OPT3-NOT: "-g"
|
||||
// DBG: "-g" "--dont-merge-basicblocks" "--return-at-end"
|
||||
// SM20: "--gpu-name" "sm_20"
|
||||
// SM35: "--gpu-name" "sm_35"
|
||||
// SM20: "--output-file" "[[CUBINFILE:[^"]*]]"
|
||||
|
|
Loading…
Reference in New Issue