[CUDA][HIP] Allow CUDA __global__ functions to have amdgpu kernel attributes

There are HIP applications e.g. Tensorflow 1.3 using amdgpu kernel attributes, however
currently they are only allowed on OpenCL kernel functions.

This patch will allow amdgpu kernel attributes to be applied to CUDA/HIP __global__
functions.

Differential Revision: https://reviews.llvm.org/D47958

llvm-svn: 334561
This commit is contained in:
Yaxun Liu 2018-06-12 23:58:59 +00:00
parent e6fdb6a28c
commit aa24601f98
5 changed files with 76 additions and 67 deletions

View File

@ -8435,7 +8435,7 @@ def err_reference_pipe_type : Error <
"pipes packet types cannot be of reference type">;
def err_opencl_no_main : Error<"%select{function|kernel}0 cannot be called 'main'">;
def err_opencl_kernel_attr :
Error<"attribute %0 can only be applied to a kernel function">;
Error<"attribute %0 can only be applied to an OpenCL kernel function">;
def err_opencl_return_value_with_address_space : Error<
"return value cannot be qualified with address space">;
def err_opencl_constant_no_init : Error<

View File

@ -6468,25 +6468,27 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
} else if (const auto *A = D->getAttr<VecTypeHintAttr>()) {
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
D->setInvalidDecl();
} else if (!D->hasAttr<CUDAGlobalAttr>()) {
if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
} else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
}
}
}
}

View File

@ -0,0 +1,37 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple nvptx \
// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
// RUN: -check-prefix=NAMD
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
// RUN: -verify -o - %s | FileCheck -check-prefix=NAMD %s
#include "Inputs/cuda.h"
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
__global__ void flat_work_group_size_32_64() {
// CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
__global__ void waves_per_eu_2() {
// CHECK: define amdgpu_kernel void @_Z14waves_per_eu_2v() [[WAVES_PER_EU_2:#[0-9]+]]
}
__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
__global__ void num_sgpr_32() {
// CHECK: define amdgpu_kernel void @_Z11num_sgpr_32v() [[NUM_SGPR_32:#[0-9]+]]
}
__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
__global__ void num_vgpr_64() {
// CHECK: define amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]]
}
// Make sure this is silently accepted on other targets.
// NAMD-NOT: "amdgpu-flat-work-group-size"
// NAMD-NOT: "amdgpu-waves-per-eu"
// NAMD-NOT: "amdgpu-num-vgpr"
// NAMD-NOT: "amdgpu-num-sgpr"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32"
// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64"

View File

@ -1,110 +1,80 @@
// RUN: %clang_cc1 -fsyntax-only -verify %s
#include "Inputs/cuda.h"
// expected-error@+2 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64)))
__global__ void flat_work_group_size_32_64() {}
// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
__attribute__((amdgpu_waves_per_eu(2)))
__global__ void waves_per_eu_2() {}
// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
__attribute__((amdgpu_waves_per_eu(2, 4)))
__global__ void waves_per_eu_2_4() {}
// expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_num_sgpr(32)))
__global__ void num_sgpr_32() {}
// expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_num_vgpr(64)))
__global__ void num_vgpr_64() {}
// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2)))
__global__ void flat_work_group_size_32_64_waves_per_eu_2() {}
// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4)))
__global__ void flat_work_group_size_32_64_waves_per_eu_2_4() {}
// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32)))
__global__ void flat_work_group_size_32_64_num_sgpr_32() {}
// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64)))
__global__ void flat_work_group_size_32_64_num_vgpr_64() {}
// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
__global__ void waves_per_eu_2_num_sgpr_32() {}
// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
__global__ void waves_per_eu_2_num_vgpr_64() {}
// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
__global__ void waves_per_eu_2_4_num_sgpr_32() {}
// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
__global__ void waves_per_eu_2_4_num_vgpr_64() {}
// expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
__global__ void num_sgpr_32_num_vgpr_64() {}
// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {}
// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {}
// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {}
// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {}
// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {}
// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {}
// expected-error@+2{{attribute 'reqd_work_group_size' can only be applied to an OpenCL kernel function}}
__attribute__((reqd_work_group_size(32, 64, 64)))
__global__ void reqd_work_group_size_32_64_64() {}
// expected-error@+2{{attribute 'work_group_size_hint' can only be applied to an OpenCL kernel function}}
__attribute__((work_group_size_hint(2, 2, 2)))
__global__ void work_group_size_hint_2_2_2() {}
// expected-error@+2{{attribute 'vec_type_hint' can only be applied to an OpenCL kernel function}}
__attribute__((vec_type_hint(int)))
__global__ void vec_type_hint_int() {}
// expected-error@+2{{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel function}}
__attribute__((intel_reqd_sub_group_size(64)))
__global__ void intel_reqd_sub_group_size_64() {}

View File

@ -14,11 +14,11 @@ kernel __attribute__((work_group_size_hint(8,16,32,4))) void kernel6() {} //expe
kernel __attribute__((work_group_size_hint(1,2,3))) __attribute__((work_group_size_hint(3,2,1))) void kernel7() {} //expected-warning{{attribute 'work_group_size_hint' is already applied with different parameters}}
__attribute__((reqd_work_group_size(8,16,32))) void kernel8(){} // expected-error {{attribute 'reqd_work_group_size' can only be applied to a kernel}}
__attribute__((reqd_work_group_size(8,16,32))) void kernel8(){} // expected-error {{attribute 'reqd_work_group_size' can only be applied to an OpenCL kernel}}
__attribute__((work_group_size_hint(8,16,32))) void kernel9(){} // expected-error {{attribute 'work_group_size_hint' can only be applied to a kernel}}
__attribute__((work_group_size_hint(8,16,32))) void kernel9(){} // expected-error {{attribute 'work_group_size_hint' can only be applied to an OpenCL kernel}}
__attribute__((vec_type_hint(char))) void kernel10(){} // expected-error {{attribute 'vec_type_hint' can only be applied to a kernel}}
__attribute__((vec_type_hint(char))) void kernel10(){} // expected-error {{attribute 'vec_type_hint' can only be applied to an OpenCL kernel}}
constant int foo1 __attribute__((reqd_work_group_size(8,16,32))) = 0; // expected-error {{'reqd_work_group_size' attribute only applies to functions}}
@ -34,6 +34,6 @@ kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expecte
kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}}
__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel}}
kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}}
kernel __attribute__((intel_reqd_sub_group_size(8))) __attribute__((intel_reqd_sub_group_size(16))) void kernel16() {} //expected-warning{{attribute 'intel_reqd_sub_group_size' is already applied with different parameters}}