NVPTX: Use the nvvm builtins to read SRegs rather than the legacy ptx ones

The ptx spellings were removed from LLVM in r274769.

llvm-svn: 274770
This commit is contained in:
Justin Bogner 2016-07-07 16:41:08 +00:00
parent a466cc33fa
commit 2d5de7e568
5 changed files with 131 additions and 134 deletions

View File

@ -14,53 +14,50 @@
// The format of this database matches clang/Basic/Builtins.def.
// Builtins retained from previous PTX back-end
BUILTIN(__builtin_ptx_read_tid_x, "i", "nc")
BUILTIN(__builtin_ptx_read_tid_y, "i", "nc")
BUILTIN(__builtin_ptx_read_tid_z, "i", "nc")
BUILTIN(__builtin_ptx_read_tid_w, "i", "nc")
// Special Registers
BUILTIN(__builtin_ptx_read_ntid_x, "i", "nc")
BUILTIN(__builtin_ptx_read_ntid_y, "i", "nc")
BUILTIN(__builtin_ptx_read_ntid_z, "i", "nc")
BUILTIN(__builtin_ptx_read_ntid_w, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
BUILTIN(__builtin_ptx_read_ctaid_x, "i", "nc")
BUILTIN(__builtin_ptx_read_ctaid_y, "i", "nc")
BUILTIN(__builtin_ptx_read_ctaid_z, "i", "nc")
BUILTIN(__builtin_ptx_read_ctaid_w, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
BUILTIN(__builtin_ptx_read_nctaid_x, "i", "nc")
BUILTIN(__builtin_ptx_read_nctaid_y, "i", "nc")
BUILTIN(__builtin_ptx_read_nctaid_z, "i", "nc")
BUILTIN(__builtin_ptx_read_nctaid_w, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
BUILTIN(__builtin_ptx_read_laneid, "i", "nc")
BUILTIN(__builtin_ptx_read_warpid, "i", "nc")
BUILTIN(__builtin_ptx_read_nwarpid, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
BUILTIN(__builtin_ptx_read_smid, "i", "nc")
BUILTIN(__builtin_ptx_read_nsmid, "i", "nc")
BUILTIN(__builtin_ptx_read_gridid, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
BUILTIN(__builtin_ptx_read_lanemask_eq, "i", "nc")
BUILTIN(__builtin_ptx_read_lanemask_le, "i", "nc")
BUILTIN(__builtin_ptx_read_lanemask_lt, "i", "nc")
BUILTIN(__builtin_ptx_read_lanemask_ge, "i", "nc")
BUILTIN(__builtin_ptx_read_lanemask_gt, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
BUILTIN(__builtin_ptx_read_clock, "i", "n")
BUILTIN(__builtin_ptx_read_clock64, "LLi", "n")
BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
BUILTIN(__builtin_ptx_read_pm0, "i", "n")
BUILTIN(__builtin_ptx_read_pm1, "i", "n")
BUILTIN(__builtin_ptx_read_pm2, "i", "n")
BUILTIN(__builtin_ptx_read_pm3, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
BUILTIN(__builtin_ptx_bar_sync, "vi", "n")
BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
// Builtins exposed as part of NVVM
// MISC
BUILTIN(__nvvm_clz_i, "ii", "")
@ -396,11 +393,11 @@ BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
// Sync
BUILTIN(__syncthreads, "v", "")
BUILTIN(__nvvm_bar0, "v", "")
BUILTIN(__nvvm_bar0_popc, "ii", "")
BUILTIN(__nvvm_bar0_and, "ii", "")
BUILTIN(__nvvm_bar0_or, "ii", "")
BUILTIN(__nvvm_bar_sync, "vi", "n")
// Shuffle

View File

@ -37,7 +37,7 @@ struct dim3;
// Example:
// int x = threadIdx.x;
// IR output:
// %0 = call i32 @llvm.ptx.read.tid.x() #3
// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
// PTX output:
// mov.u32 %r2, %tid.x;
@ -64,9 +64,9 @@ struct dim3;
__attribute__((device)) TypeName *operator&() const __DELETE
struct __cuda_builtin_threadIdx_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
__CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
__CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
// threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
// uint3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator uint3() const;
@ -75,9 +75,9 @@ private:
};
struct __cuda_builtin_blockIdx_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
__CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
__CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
// blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
// uint3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator uint3() const;
@ -86,9 +86,9 @@ private:
};
struct __cuda_builtin_blockDim_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
__CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
__CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
// blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
// dim3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
@ -97,9 +97,9 @@ private:
};
struct __cuda_builtin_gridDim_t {
__CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
__CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
__CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
__CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
__CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
// gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
// dim3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;

View File

@ -11,15 +11,15 @@
__device__ int read_tid() {
// CHECK: call i32 @llvm.ptx.read.tid.x()
// CHECK: call i32 @llvm.ptx.read.tid.y()
// CHECK: call i32 @llvm.ptx.read.tid.z()
// CHECK: call i32 @llvm.ptx.read.tid.w()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
int x = __builtin_ptx_read_tid_x();
int y = __builtin_ptx_read_tid_y();
int z = __builtin_ptx_read_tid_z();
int w = __builtin_ptx_read_tid_w();
int x = __nvvm_read_ptx_sreg_tid_x();
int y = __nvvm_read_ptx_sreg_tid_y();
int z = __nvvm_read_ptx_sreg_tid_z();
int w = __nvvm_read_ptx_sreg_tid_w();
return x + y + z + w;
@ -27,15 +27,15 @@ __device__ int read_tid() {
__device__ int read_ntid() {
// CHECK: call i32 @llvm.ptx.read.ntid.x()
// CHECK: call i32 @llvm.ptx.read.ntid.y()
// CHECK: call i32 @llvm.ptx.read.ntid.z()
// CHECK: call i32 @llvm.ptx.read.ntid.w()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
int x = __builtin_ptx_read_ntid_x();
int y = __builtin_ptx_read_ntid_y();
int z = __builtin_ptx_read_ntid_z();
int w = __builtin_ptx_read_ntid_w();
int x = __nvvm_read_ptx_sreg_ntid_x();
int y = __nvvm_read_ptx_sreg_ntid_y();
int z = __nvvm_read_ptx_sreg_ntid_z();
int w = __nvvm_read_ptx_sreg_ntid_w();
return x + y + z + w;
@ -43,15 +43,15 @@ __device__ int read_ntid() {
__device__ int read_ctaid() {
// CHECK: call i32 @llvm.ptx.read.ctaid.x()
// CHECK: call i32 @llvm.ptx.read.ctaid.y()
// CHECK: call i32 @llvm.ptx.read.ctaid.z()
// CHECK: call i32 @llvm.ptx.read.ctaid.w()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
int x = __builtin_ptx_read_ctaid_x();
int y = __builtin_ptx_read_ctaid_y();
int z = __builtin_ptx_read_ctaid_z();
int w = __builtin_ptx_read_ctaid_w();
int x = __nvvm_read_ptx_sreg_ctaid_x();
int y = __nvvm_read_ptx_sreg_ctaid_y();
int z = __nvvm_read_ptx_sreg_ctaid_z();
int w = __nvvm_read_ptx_sreg_ctaid_w();
return x + y + z + w;
@ -59,15 +59,15 @@ __device__ int read_ctaid() {
__device__ int read_nctaid() {
// CHECK: call i32 @llvm.ptx.read.nctaid.x()
// CHECK: call i32 @llvm.ptx.read.nctaid.y()
// CHECK: call i32 @llvm.ptx.read.nctaid.z()
// CHECK: call i32 @llvm.ptx.read.nctaid.w()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
int x = __builtin_ptx_read_nctaid_x();
int y = __builtin_ptx_read_nctaid_y();
int z = __builtin_ptx_read_nctaid_z();
int w = __builtin_ptx_read_nctaid_w();
int x = __nvvm_read_ptx_sreg_nctaid_x();
int y = __nvvm_read_ptx_sreg_nctaid_y();
int z = __nvvm_read_ptx_sreg_nctaid_z();
int w = __nvvm_read_ptx_sreg_nctaid_w();
return x + y + z + w;
@ -75,19 +75,19 @@ __device__ int read_nctaid() {
__device__ int read_ids() {
// CHECK: call i32 @llvm.ptx.read.laneid()
// CHECK: call i32 @llvm.ptx.read.warpid()
// CHECK: call i32 @llvm.ptx.read.nwarpid()
// CHECK: call i32 @llvm.ptx.read.smid()
// CHECK: call i32 @llvm.ptx.read.nsmid()
// CHECK: call i32 @llvm.ptx.read.gridid()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid()
int a = __builtin_ptx_read_laneid();
int b = __builtin_ptx_read_warpid();
int c = __builtin_ptx_read_nwarpid();
int d = __builtin_ptx_read_smid();
int e = __builtin_ptx_read_nsmid();
int f = __builtin_ptx_read_gridid();
int a = __nvvm_read_ptx_sreg_laneid();
int b = __nvvm_read_ptx_sreg_warpid();
int c = __nvvm_read_ptx_sreg_nwarpid();
int d = __nvvm_read_ptx_sreg_smid();
int e = __nvvm_read_ptx_sreg_nsmid();
int f = __nvvm_read_ptx_sreg_gridid();
return a + b + c + d + e + f;
@ -95,17 +95,17 @@ __device__ int read_ids() {
__device__ int read_lanemasks() {
// CHECK: call i32 @llvm.ptx.read.lanemask.eq()
// CHECK: call i32 @llvm.ptx.read.lanemask.le()
// CHECK: call i32 @llvm.ptx.read.lanemask.lt()
// CHECK: call i32 @llvm.ptx.read.lanemask.ge()
// CHECK: call i32 @llvm.ptx.read.lanemask.gt()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
int a = __builtin_ptx_read_lanemask_eq();
int b = __builtin_ptx_read_lanemask_le();
int c = __builtin_ptx_read_lanemask_lt();
int d = __builtin_ptx_read_lanemask_ge();
int e = __builtin_ptx_read_lanemask_gt();
int a = __nvvm_read_ptx_sreg_lanemask_eq();
int b = __nvvm_read_ptx_sreg_lanemask_le();
int c = __nvvm_read_ptx_sreg_lanemask_lt();
int d = __nvvm_read_ptx_sreg_lanemask_ge();
int e = __nvvm_read_ptx_sreg_lanemask_gt();
return a + b + c + d + e;
@ -113,26 +113,26 @@ __device__ int read_lanemasks() {
__device__ long long read_clocks() {
// CHECK: call i32 @llvm.ptx.read.clock()
// CHECK: call i64 @llvm.ptx.read.clock64()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
int a = __builtin_ptx_read_clock();
long long b = __builtin_ptx_read_clock64();
int a = __nvvm_read_ptx_sreg_clock();
long long b = __nvvm_read_ptx_sreg_clock64();
return a + b;
}
__device__ int read_pms() {
// CHECK: call i32 @llvm.ptx.read.pm0()
// CHECK: call i32 @llvm.ptx.read.pm1()
// CHECK: call i32 @llvm.ptx.read.pm2()
// CHECK: call i32 @llvm.ptx.read.pm3()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2()
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3()
int a = __builtin_ptx_read_pm0();
int b = __builtin_ptx_read_pm1();
int c = __builtin_ptx_read_pm2();
int d = __builtin_ptx_read_pm3();
int a = __nvvm_read_ptx_sreg_pm0();
int b = __nvvm_read_ptx_sreg_pm1();
int c = __nvvm_read_ptx_sreg_pm2();
int d = __nvvm_read_ptx_sreg_pm3();
return a + b + c + d;
@ -140,9 +140,9 @@ __device__ int read_pms() {
__device__ void sync() {
// CHECK: call void @llvm.ptx.bar.sync(i32 0)
// CHECK: call void @llvm.nvvm.bar.sync(i32 0)
__builtin_ptx_bar_sync(0);
__nvvm_bar_sync(0);
}

View File

@ -6,21 +6,21 @@
__attribute__((global))
void kernel(int *out) {
int i = 0;
out[i++] = threadIdx.x; // CHECK: call i32 @llvm.ptx.read.tid.x()
out[i++] = threadIdx.y; // CHECK: call i32 @llvm.ptx.read.tid.y()
out[i++] = threadIdx.z; // CHECK: call i32 @llvm.ptx.read.tid.z()
out[i++] = threadIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
out[i++] = threadIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
out[i++] = threadIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
out[i++] = blockIdx.x; // CHECK: call i32 @llvm.ptx.read.ctaid.x()
out[i++] = blockIdx.y; // CHECK: call i32 @llvm.ptx.read.ctaid.y()
out[i++] = blockIdx.z; // CHECK: call i32 @llvm.ptx.read.ctaid.z()
out[i++] = blockIdx.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
out[i++] = blockIdx.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
out[i++] = blockIdx.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
out[i++] = blockDim.x; // CHECK: call i32 @llvm.ptx.read.ntid.x()
out[i++] = blockDim.y; // CHECK: call i32 @llvm.ptx.read.ntid.y()
out[i++] = blockDim.z; // CHECK: call i32 @llvm.ptx.read.ntid.z()
out[i++] = blockDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
out[i++] = blockDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
out[i++] = blockDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
out[i++] = gridDim.x; // CHECK: call i32 @llvm.ptx.read.nctaid.x()
out[i++] = gridDim.y; // CHECK: call i32 @llvm.ptx.read.nctaid.y()
out[i++] = gridDim.z; // CHECK: call i32 @llvm.ptx.read.nctaid.z()
out[i++] = gridDim.x; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
out[i++] = gridDim.y; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
out[i++] = gridDim.z; // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
out[i++] = warpSize; // CHECK: store i32 32,

View File

@ -18,13 +18,13 @@
void hf() {
int x = __builtin_ia32_rdtsc();
int y = __builtin_ptx_read_tid_x(); // expected-note {{'__builtin_ptx_read_tid_x' declared here}}
// expected-error@-1 {{reference to __device__ function '__builtin_ptx_read_tid_x' in __host__ function}}
int y = __nvvm_read_ptx_sreg_tid_x(); // expected-note {{'__nvvm_read_ptx_sreg_tid_x' declared here}}
// expected-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
x = __builtin_abs(1);
}
__attribute__((device)) void df() {
int x = __builtin_ptx_read_tid_x();
int x = __nvvm_read_ptx_sreg_tid_x();
int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
// expected-note@20 {{'__builtin_ia32_rdtsc' declared here}}
x = __builtin_abs(1);