From aa24601f9890c5cbc9b33caf8a1d058776ece0af Mon Sep 17 00:00:00 2001
From: Yaxun Liu <Yaxun.Liu@amd.com>
Date: Tue, 12 Jun 2018 23:58:59 +0000
Subject: [PATCH] [CUDA][HIP] Allow CUDA __global__ functions to have amdgpu
 kernel attributes

There are HIP applications e.g. Tensorflow 1.3 using amdgpu kernel attributes, however
currently they are only allowed on OpenCL kernel functions.

This patch will allow amdgpu kernel attributes to be applied to CUDA/HIP __global__
functions.

Differential Revision: https://reviews.llvm.org/D47958

llvm-svn: 334561
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +-
 clang/lib/Sema/SemaDeclAttr.cpp               | 34 +++++-----
 clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu | 37 +++++++++++
 clang/test/SemaCUDA/amdgpu-attrs.cu           | 62 +++++--------------
 clang/test/SemaOpenCL/invalid-kernel-attrs.cl |  8 +--
 5 files changed, 76 insertions(+), 67 deletions(-)
 create mode 100644 clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f0ea769f910f..8fb30ff4e459 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8435,7 +8435,7 @@ def err_reference_pipe_type : Error <
   "pipes packet types cannot be of reference type">;
 def err_opencl_no_main : Error<"%select{function|kernel}0 cannot be called 'main'">;
 def err_opencl_kernel_attr :
-  Error<"attribute %0 can only be applied to a kernel function">;
+  Error<"attribute %0 can only be applied to an OpenCL kernel function">;
 def err_opencl_return_value_with_address_space : Error<
   "return value cannot be qualified with address space">;
 def err_opencl_constant_no_init : Error<
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index e4532a7e678a..779192b8650b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6468,25 +6468,27 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
     } else if (const auto *A = D->getAttr<VecTypeHintAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
-    } else if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
-      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << A << ExpectedKernelFunction;
-      D->setInvalidDecl();
-    } else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
-      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << A << ExpectedKernelFunction;
-      D->setInvalidDecl();
-    } else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
-      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << A << ExpectedKernelFunction;
-      D->setInvalidDecl();
-    } else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
-      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << A << ExpectedKernelFunction;
-      D->setInvalidDecl();
     } else if (const auto *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
+    } else if (!D->hasAttr<CUDAGlobalAttr>()) {
+      if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
+        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+            << A << ExpectedKernelFunction;
+        D->setInvalidDecl();
+      } else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
+        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+            << A << ExpectedKernelFunction;
+        D->setInvalidDecl();
+      } else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
+        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+            << A << ExpectedKernelFunction;
+        D->setInvalidDecl();
+      } else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
+        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+            << A << ExpectedKernelFunction;
+        D->setInvalidDecl();
+      }
     }
   }
 }
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
new file mode 100644
index 000000000000..70eb9091d8d4
--- /dev/null
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN:     -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx \
+// RUN:     -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
+// RUN:     -check-prefix=NAMD
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:     -verify -o - %s | FileCheck -check-prefix=NAMD %s
+
+#include "Inputs/cuda.h"
+
+__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
+__global__ void flat_work_group_size_32_64() {
+// CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
+__global__ void waves_per_eu_2() {
+// CHECK: define amdgpu_kernel void @_Z14waves_per_eu_2v() [[WAVES_PER_EU_2:#[0-9]+]]
+}
+__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
+__global__ void num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @_Z11num_sgpr_32v() [[NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
+__global__ void num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]]
+}
+
+// Make sure this is silently accepted on other targets.
+// NAMD-NOT: "amdgpu-flat-work-group-size"
+// NAMD-NOT: "amdgpu-waves-per-eu"
+// NAMD-NOT: "amdgpu-num-vgpr"
+// NAMD-NOT: "amdgpu-num-sgpr"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" 
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" 
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" 
diff --git a/clang/test/SemaCUDA/amdgpu-attrs.cu b/clang/test/SemaCUDA/amdgpu-attrs.cu
index 87559726869b..63abda9eea37 100644
--- a/clang/test/SemaCUDA/amdgpu-attrs.cu
+++ b/clang/test/SemaCUDA/amdgpu-attrs.cu
@@ -1,110 +1,80 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-
 #include "Inputs/cuda.h"
 
 
-// expected-error@+2 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64)))
 __global__ void flat_work_group_size_32_64() {}
 
-// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2)))
 __global__ void waves_per_eu_2() {}
 
-// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2, 4)))
 __global__ void waves_per_eu_2_4() {}
 
-// expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_sgpr(32)))
 __global__ void num_sgpr_32() {}
 
-// expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_vgpr(64)))
 __global__ void num_vgpr_64() {}
 
 
-// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2() {}
 
-// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4() {}
 
-// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32)))
 __global__ void flat_work_group_size_32_64_num_sgpr_32() {}
 
-// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_num_vgpr_64() {}
 
-// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
 __global__ void waves_per_eu_2_num_sgpr_32() {}
 
-// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
 __global__ void waves_per_eu_2_num_vgpr_64() {}
 
-// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
 __global__ void waves_per_eu_2_4_num_sgpr_32() {}
 
-// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
 __global__ void waves_per_eu_2_4_num_vgpr_64() {}
 
-// expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
 __global__ void num_sgpr_32_num_vgpr_64() {}
 
-
-// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {}
 
-// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {}
 
-// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {}
 
-// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {}
 
-
-// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {}
 
-// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {}
+
+// expected-error@+2{{attribute 'reqd_work_group_size' can only be applied to an OpenCL kernel function}}
+__attribute__((reqd_work_group_size(32, 64, 64)))
+__global__ void reqd_work_group_size_32_64_64() {}
+
+// expected-error@+2{{attribute 'work_group_size_hint' can only be applied to an OpenCL kernel function}}
+__attribute__((work_group_size_hint(2, 2, 2)))
+__global__ void work_group_size_hint_2_2_2() {}
+
+// expected-error@+2{{attribute 'vec_type_hint' can only be applied to an OpenCL kernel function}}
+__attribute__((vec_type_hint(int)))
+__global__ void vec_type_hint_int() {}
+
+// expected-error@+2{{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel function}}
+__attribute__((intel_reqd_sub_group_size(64)))
+__global__ void intel_reqd_sub_group_size_64() {}
diff --git a/clang/test/SemaOpenCL/invalid-kernel-attrs.cl b/clang/test/SemaOpenCL/invalid-kernel-attrs.cl
index 1f1359cceead..a19a989f41bb 100644
--- a/clang/test/SemaOpenCL/invalid-kernel-attrs.cl
+++ b/clang/test/SemaOpenCL/invalid-kernel-attrs.cl
@@ -14,11 +14,11 @@ kernel __attribute__((work_group_size_hint(8,16,32,4))) void kernel6() {} //expe
 
 kernel __attribute__((work_group_size_hint(1,2,3))) __attribute__((work_group_size_hint(3,2,1))) void kernel7() {}  //expected-warning{{attribute 'work_group_size_hint' is already applied with different parameters}}
 
-__attribute__((reqd_work_group_size(8,16,32))) void kernel8(){} // expected-error {{attribute 'reqd_work_group_size' can only be applied to a kernel}}
+__attribute__((reqd_work_group_size(8,16,32))) void kernel8(){} // expected-error {{attribute 'reqd_work_group_size' can only be applied to an OpenCL kernel}}
 
-__attribute__((work_group_size_hint(8,16,32))) void kernel9(){} // expected-error {{attribute 'work_group_size_hint' can only be applied to a kernel}}
+__attribute__((work_group_size_hint(8,16,32))) void kernel9(){} // expected-error {{attribute 'work_group_size_hint' can only be applied to an OpenCL kernel}}
 
-__attribute__((vec_type_hint(char))) void kernel10(){} // expected-error {{attribute 'vec_type_hint' can only be applied to a kernel}}
+__attribute__((vec_type_hint(char))) void kernel10(){} // expected-error {{attribute 'vec_type_hint' can only be applied to an OpenCL kernel}}
 
 constant int foo1 __attribute__((reqd_work_group_size(8,16,32))) = 0; // expected-error {{'reqd_work_group_size' attribute only applies to functions}}
 
@@ -34,6 +34,6 @@ kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expecte
 kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
 kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
 
-__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}}
+__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel}}
 kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}}
 kernel __attribute__((intel_reqd_sub_group_size(8))) __attribute__((intel_reqd_sub_group_size(16))) void kernel16() {}  //expected-warning{{attribute 'intel_reqd_sub_group_size' is already applied with different parameters}}