From 2d5de7e568f8a152086d758b50aaa66a9a6a0721 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Thu, 7 Jul 2016 16:41:08 +0000 Subject: [PATCH] NVPTX: Use the nvvm builtins to read SRegs rather than the legacy ptx ones The ptx spellings were removed from LLVM in r274769. llvm-svn: 274770 --- clang/include/clang/Basic/BuiltinsNVPTX.def | 73 +++++------ clang/lib/Headers/cuda_builtin_vars.h | 26 ++-- clang/test/CodeGen/builtins-nvptx.c | 136 ++++++++++---------- clang/test/CodeGenCUDA/cuda-builtin-vars.cu | 24 ++-- clang/test/SemaCUDA/builtins.cu | 6 +- 5 files changed, 131 insertions(+), 134 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 48aa14bbfe10..acfed606d528 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -14,53 +14,50 @@ // The format of this database matches clang/Basic/Builtins.def. -// Builtins retained from previous PTX back-end -BUILTIN(__builtin_ptx_read_tid_x, "i", "nc") -BUILTIN(__builtin_ptx_read_tid_y, "i", "nc") -BUILTIN(__builtin_ptx_read_tid_z, "i", "nc") -BUILTIN(__builtin_ptx_read_tid_w, "i", "nc") +// Special Registers -BUILTIN(__builtin_ptx_read_ntid_x, "i", "nc") -BUILTIN(__builtin_ptx_read_ntid_y, "i", "nc") -BUILTIN(__builtin_ptx_read_ntid_z, "i", "nc") -BUILTIN(__builtin_ptx_read_ntid_w, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc") -BUILTIN(__builtin_ptx_read_ctaid_x, "i", "nc") -BUILTIN(__builtin_ptx_read_ctaid_y, "i", "nc") -BUILTIN(__builtin_ptx_read_ctaid_z, "i", "nc") -BUILTIN(__builtin_ptx_read_ctaid_w, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc") -BUILTIN(__builtin_ptx_read_nctaid_x, "i", "nc") -BUILTIN(__builtin_ptx_read_nctaid_y, "i", "nc") -BUILTIN(__builtin_ptx_read_nctaid_z, "i", "nc") -BUILTIN(__builtin_ptx_read_nctaid_w, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc") -BUILTIN(__builtin_ptx_read_laneid, "i", "nc") -BUILTIN(__builtin_ptx_read_warpid, "i", "nc") -BUILTIN(__builtin_ptx_read_nwarpid, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc") -BUILTIN(__builtin_ptx_read_smid, "i", "nc") -BUILTIN(__builtin_ptx_read_nsmid, "i", "nc") -BUILTIN(__builtin_ptx_read_gridid, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc") -BUILTIN(__builtin_ptx_read_lanemask_eq, "i", "nc") -BUILTIN(__builtin_ptx_read_lanemask_le, "i", "nc") -BUILTIN(__builtin_ptx_read_lanemask_lt, "i", "nc") -BUILTIN(__builtin_ptx_read_lanemask_ge, "i", "nc") -BUILTIN(__builtin_ptx_read_lanemask_gt, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc") -BUILTIN(__builtin_ptx_read_clock, "i", "n") -BUILTIN(__builtin_ptx_read_clock64, "LLi", "n") +BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc") +BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc") -BUILTIN(__builtin_ptx_read_pm0, "i", "n") -BUILTIN(__builtin_ptx_read_pm1, "i", "n") -BUILTIN(__builtin_ptx_read_pm2, "i", "n") -BUILTIN(__builtin_ptx_read_pm3, "i", "n") +BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n") +BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n") -BUILTIN(__builtin_ptx_bar_sync, "vi", "n") +BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n") +BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n") +BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n") +BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") - -// Builtins exposed as part of NVVM // MISC BUILTIN(__nvvm_clz_i, "ii", "") @@ -396,11 +393,11 @@ BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") // Sync -BUILTIN(__syncthreads, "v", "") BUILTIN(__nvvm_bar0, "v", "") BUILTIN(__nvvm_bar0_popc, "ii", "") BUILTIN(__nvvm_bar0_and, "ii", "") BUILTIN(__nvvm_bar0_or, "ii", "") +BUILTIN(__nvvm_bar_sync, "vi", "n") // Shuffle diff --git a/clang/lib/Headers/cuda_builtin_vars.h b/clang/lib/Headers/cuda_builtin_vars.h index ec8308640360..6f5eb9c78d85 100644 --- a/clang/lib/Headers/cuda_builtin_vars.h +++ b/clang/lib/Headers/cuda_builtin_vars.h @@ -37,7 +37,7 @@ struct dim3; // Example: // int x = threadIdx.x; // IR output: -// %0 = call i32 @llvm.ptx.read.tid.x() #3 +// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 // PTX output: // mov.u32 %r2, %tid.x; @@ -64,9 +64,9 @@ struct dim3; __attribute__((device)) TypeName *operator&() const __DELETE struct __cuda_builtin_threadIdx_t { - __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x()); - __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y()); - __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z()); + __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); + __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); + __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator uint3() const; @@ -75,9 +75,9 @@ private: }; struct __cuda_builtin_blockIdx_t { - __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x()); - __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y()); - __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z()); + __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); + __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); + __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator uint3() const; @@ -86,9 +86,9 @@ private: }; struct __cuda_builtin_blockDim_t { - __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x()); - __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y()); - __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z()); + __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); + __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); + __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; @@ -97,9 +97,9 @@ private: }; struct __cuda_builtin_gridDim_t { - __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x()); - __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y()); - __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z()); + __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); + __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); + __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 4607f3be2be0..2cb0ff3e37ff 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -11,15 +11,15 @@ __device__ int read_tid() { -// CHECK: call i32 @llvm.ptx.read.tid.x() -// CHECK: call i32 @llvm.ptx.read.tid.y() -// CHECK: call i32 @llvm.ptx.read.tid.z() -// CHECK: call i32 @llvm.ptx.read.tid.w() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w() - int x = __builtin_ptx_read_tid_x(); - int y = __builtin_ptx_read_tid_y(); - int z = __builtin_ptx_read_tid_z(); - int w = __builtin_ptx_read_tid_w(); + int x = __nvvm_read_ptx_sreg_tid_x(); + int y = __nvvm_read_ptx_sreg_tid_y(); + int z = __nvvm_read_ptx_sreg_tid_z(); + int w = __nvvm_read_ptx_sreg_tid_w(); return x + y + z + w; @@ -27,15 +27,15 @@ __device__ int read_tid() { __device__ int read_ntid() { -// CHECK: call i32 @llvm.ptx.read.ntid.x() -// CHECK: call i32 @llvm.ptx.read.ntid.y() -// CHECK: call i32 @llvm.ptx.read.ntid.z() -// CHECK: call i32 @llvm.ptx.read.ntid.w() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w() - int x = __builtin_ptx_read_ntid_x(); - int y = __builtin_ptx_read_ntid_y(); - int z = __builtin_ptx_read_ntid_z(); - int w = __builtin_ptx_read_ntid_w(); + int x = __nvvm_read_ptx_sreg_ntid_x(); + int y = __nvvm_read_ptx_sreg_ntid_y(); + int z = __nvvm_read_ptx_sreg_ntid_z(); + int w = __nvvm_read_ptx_sreg_ntid_w(); return x + y + z + w; @@ -43,15 +43,15 @@ __device__ int read_ntid() { __device__ int read_ctaid() { -// CHECK: call i32 @llvm.ptx.read.ctaid.x() -// CHECK: call i32 @llvm.ptx.read.ctaid.y() -// CHECK: call i32 @llvm.ptx.read.ctaid.z() -// CHECK: call i32 @llvm.ptx.read.ctaid.w() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() - int x = __builtin_ptx_read_ctaid_x(); - int y = __builtin_ptx_read_ctaid_y(); - int z = __builtin_ptx_read_ctaid_z(); - int w = __builtin_ptx_read_ctaid_w(); + int x = __nvvm_read_ptx_sreg_ctaid_x(); + int y = __nvvm_read_ptx_sreg_ctaid_y(); + int z = __nvvm_read_ptx_sreg_ctaid_z(); + int w = __nvvm_read_ptx_sreg_ctaid_w(); return x + y + z + w; @@ -59,15 +59,15 @@ __device__ int read_ctaid() { __device__ int read_nctaid() { -// CHECK: call i32 @llvm.ptx.read.nctaid.x() -// CHECK: call i32 @llvm.ptx.read.nctaid.y() -// CHECK: call i32 @llvm.ptx.read.nctaid.z() -// CHECK: call i32 @llvm.ptx.read.nctaid.w() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() - int x = __builtin_ptx_read_nctaid_x(); - int y = __builtin_ptx_read_nctaid_y(); - int z = __builtin_ptx_read_nctaid_z(); - int w = __builtin_ptx_read_nctaid_w(); + int x = __nvvm_read_ptx_sreg_nctaid_x(); + int y = __nvvm_read_ptx_sreg_nctaid_y(); + int z = __nvvm_read_ptx_sreg_nctaid_z(); + int w = __nvvm_read_ptx_sreg_nctaid_w(); return x + y + z + w; @@ -75,19 +75,19 @@ __device__ int read_nctaid() { __device__ int read_ids() { -// CHECK: call i32 @llvm.ptx.read.laneid() -// CHECK: call i32 @llvm.ptx.read.warpid() -// CHECK: call i32 @llvm.ptx.read.nwarpid() -// CHECK: call i32 @llvm.ptx.read.smid() -// CHECK: call i32 @llvm.ptx.read.nsmid() -// CHECK: call i32 @llvm.ptx.read.gridid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid() - int a = __builtin_ptx_read_laneid(); - int b = __builtin_ptx_read_warpid(); - int c = __builtin_ptx_read_nwarpid(); - int d = __builtin_ptx_read_smid(); - int e = __builtin_ptx_read_nsmid(); - int f = __builtin_ptx_read_gridid(); + int a = __nvvm_read_ptx_sreg_laneid(); + int b = __nvvm_read_ptx_sreg_warpid(); + int c = __nvvm_read_ptx_sreg_nwarpid(); + int d = __nvvm_read_ptx_sreg_smid(); + int e = __nvvm_read_ptx_sreg_nsmid(); + int f = __nvvm_read_ptx_sreg_gridid(); return a + b + c + d + e + f; @@ -95,17 +95,17 @@ __device__ int read_ids() { __device__ int read_lanemasks() { -// CHECK: call i32 @llvm.ptx.read.lanemask.eq() -// CHECK: call i32 @llvm.ptx.read.lanemask.le() -// CHECK: call i32 @llvm.ptx.read.lanemask.lt() -// CHECK: call i32 @llvm.ptx.read.lanemask.ge() -// CHECK: call i32 @llvm.ptx.read.lanemask.gt() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() - int a = __builtin_ptx_read_lanemask_eq(); - int b = __builtin_ptx_read_lanemask_le(); - int c = __builtin_ptx_read_lanemask_lt(); - int d = __builtin_ptx_read_lanemask_ge(); - int e = __builtin_ptx_read_lanemask_gt(); + int a = __nvvm_read_ptx_sreg_lanemask_eq(); + int b = __nvvm_read_ptx_sreg_lanemask_le(); + int c = __nvvm_read_ptx_sreg_lanemask_lt(); + int d = __nvvm_read_ptx_sreg_lanemask_ge(); + int e = __nvvm_read_ptx_sreg_lanemask_gt(); return a + b + c + d + e; @@ -113,26 +113,26 @@ __device__ int read_lanemasks() { __device__ long long read_clocks() { -// CHECK: call i32 @llvm.ptx.read.clock() -// CHECK: call i64 @llvm.ptx.read.clock64() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock() +// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64() - int a = __builtin_ptx_read_clock(); - long long b = __builtin_ptx_read_clock64(); + int a = __nvvm_read_ptx_sreg_clock(); + long long b = __nvvm_read_ptx_sreg_clock64(); return a + b; } __device__ int read_pms() { -// CHECK: call i32 @llvm.ptx.read.pm0() -// CHECK: call i32 @llvm.ptx.read.pm1() -// CHECK: call i32 @llvm.ptx.read.pm2() -// CHECK: call i32 @llvm.ptx.read.pm3() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3() - int a = __builtin_ptx_read_pm0(); - int b = __builtin_ptx_read_pm1(); - int c = __builtin_ptx_read_pm2(); - int d = __builtin_ptx_read_pm3(); + int a = __nvvm_read_ptx_sreg_pm0(); + int b = __nvvm_read_ptx_sreg_pm1(); + int c = __nvvm_read_ptx_sreg_pm2(); + int d = __nvvm_read_ptx_sreg_pm3(); return a + b + c + d; @@ -140,9 +140,9 @@ __device__ int read_pms() { __device__ void sync() { -// CHECK: call void @llvm.ptx.bar.sync(i32 0) +// CHECK: call void @llvm.nvvm.bar.sync(i32 0) - __builtin_ptx_bar_sync(0); + __nvvm_bar_sync(0); } diff --git a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu index 834e16d04d67..c2159f5af141 100644 --- a/clang/test/CodeGenCUDA/cuda-builtin-vars.cu +++ b/clang/test/CodeGenCUDA/cuda-builtin-vars.cu @@ -6,21 +6,21 @@ __attribute__((global)) void kernel(int *out) { int i = 0; - out[i++] = threadIdx.x; // CHECK: call i32 @llvm.ptx.read.tid.x() - out[i++] = threadIdx.y; // CHECK: call i32 @llvm.ptx.read.tid.y() - out[i++] = threadIdx.z; // CHECK: call i32 @llvm.ptx.read.tid.z() + out[i++] = threadIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + out[i++] = threadIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() + out[i++] = threadIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() - out[i++] = blockIdx.x; // CHECK: call i32 @llvm.ptx.read.ctaid.x() - out[i++] = blockIdx.y; // CHECK: call i32 @llvm.ptx.read.ctaid.y() - out[i++] = blockIdx.z; // CHECK: call i32 @llvm.ptx.read.ctaid.z() + out[i++] = blockIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() + out[i++] = blockIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() + out[i++] = blockIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() - out[i++] = blockDim.x; // CHECK: call i32 @llvm.ptx.read.ntid.x() - out[i++] = blockDim.y; // CHECK: call i32 @llvm.ptx.read.ntid.y() - out[i++] = blockDim.z; // CHECK: call i32 @llvm.ptx.read.ntid.z() + out[i++] = blockDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + out[i++] = blockDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() + out[i++] = blockDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() - out[i++] = gridDim.x; // CHECK: call i32 @llvm.ptx.read.nctaid.x() - out[i++] = gridDim.y; // CHECK: call i32 @llvm.ptx.read.nctaid.y() - out[i++] = gridDim.z; // CHECK: call i32 @llvm.ptx.read.nctaid.z() + out[i++] = gridDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() + out[i++] = gridDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() + out[i++] = gridDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() out[i++] = warpSize; // CHECK: store i32 32, diff --git a/clang/test/SemaCUDA/builtins.cu b/clang/test/SemaCUDA/builtins.cu index 7e6d014c3fe6..814fda2ac7d3 100644 --- a/clang/test/SemaCUDA/builtins.cu +++ b/clang/test/SemaCUDA/builtins.cu @@ -18,13 +18,13 @@ void hf() { int x = __builtin_ia32_rdtsc(); - int y = __builtin_ptx_read_tid_x(); // expected-note {{'__builtin_ptx_read_tid_x' declared here}} - // expected-error@-1 {{reference to __device__ function '__builtin_ptx_read_tid_x' in __host__ function}} + int y = __nvvm_read_ptx_sreg_tid_x(); // expected-note {{'__nvvm_read_ptx_sreg_tid_x' declared here}} + // expected-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}} x = __builtin_abs(1); } __attribute__((device)) void df() { - int x = __builtin_ptx_read_tid_x(); + int x = __nvvm_read_ptx_sreg_tid_x(); int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}} // expected-note@20 {{'__builtin_ia32_rdtsc' declared here}} x = __builtin_abs(1);