[OpenCL] Mark group functions as convergent in opencl-c.h

Certain OpenCL builtin functions are supposed to be executed by all threads in a work group or sub group. Such functions should not be made divergent during transformation. It makes sense to mark them with convergent attribute.

The adding of convergent attribute is based on Ettore Speziale's work and the original proposal and patch can be found at https://www.mail-archive.com/cfe-commits@lists.llvm.org/msg22271.html.

Differential Revision: https://reviews.llvm.org/D25343

llvm-svn: 285725
This commit is contained in:
Yaxun Liu 2016-11-01 18:45:32 +00:00
parent cb578f84e0
commit 7d07ae7c85
7 changed files with 332 additions and 163 deletions

View File

@ -1026,6 +1026,12 @@ def NoDuplicate : InheritableAttr {
let Documentation = [NoDuplicateDocs];
}
def Convergent : InheritableAttr {
let Spellings = [GNU<"convergent">, CXX11<"clang", "convergent">];
let Subjects = SubjectList<[Function]>;
let Documentation = [ConvergentDocs];
}
def NoInline : InheritableAttr {
let Spellings = [GCC<"noinline">, Declspec<"noinline">];
let Subjects = SubjectList<[Function]>;

View File

@ -606,6 +606,33 @@ of the condition.
}];
}
def ConvergentDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``convergent`` attribute can be placed on a function declaration. It is
translated into the LLVM ``convergent`` attribute, which indicates that the call
instructions of a function with this attribute cannot be made control-dependent
on any additional values.
In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA,
the call instructions of a function with this attribute must be executed by
all work items or threads in a work group or sub group.
This attribute is different from ``noduplicate`` because it allows duplicating
function calls if it can be proved that the duplicated function calls are
not made control-dependent on any additional values, e.g., unrolling a loop
executed by all work items.
Sample usage:
.. code-block:: c
void convfunc(void) __attribute__((convergent));
// Setting it as a C++11 attribute is also valid in a C++ program.
// void convfunc(void) [[clang::convergent]];
}];
}
def NoSplitStackDocs : Documentation {
let Category = DocCatFunction;
let Content = [{

View File

@ -1648,6 +1648,8 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
if (TargetDecl->hasAttr<NoDuplicateAttr>())
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(

View File

@ -17,6 +17,7 @@
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define __ovld __attribute__((overloadable))
#define __conv __attribute__((convergent))
// Optimizations
#define __purefn __attribute__((pure))
@ -13822,7 +13823,7 @@ typedef uint cl_mem_fence_flags;
* image objects and then want to read the updated data.
*/
void __ovld barrier(cl_mem_fence_flags flags);
void __ovld __conv barrier(cl_mem_fence_flags flags);
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
@ -13835,8 +13836,8 @@ typedef enum memory_scope
memory_scope_sub_group
} memory_scope;
void __ovld work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
void __ovld work_group_barrier(cl_mem_fence_flags flags);
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions
@ -16568,101 +16569,101 @@ int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
// OpenCL v2.0 s6.13.15 - Work-group Functions
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
int __ovld work_group_all(int predicate);
int __ovld work_group_any(int predicate);
int __ovld __conv work_group_all(int predicate);
int __ovld __conv work_group_any(int predicate);
#ifdef cl_khr_fp16
half __ovld work_group_broadcast(half a, size_t local_id);
half __ovld work_group_broadcast(half a, size_t x, size_t y);
half __ovld work_group_broadcast(half a, size_t x, size_t y, size_t z);
half __ovld __conv work_group_broadcast(half a, size_t local_id);
half __ovld __conv work_group_broadcast(half a, size_t x, size_t y);
half __ovld __conv work_group_broadcast(half a, size_t x, size_t y, size_t z);
#endif
int __ovld work_group_broadcast(int a, size_t local_id);
int __ovld work_group_broadcast(int a, size_t x, size_t y);
int __ovld work_group_broadcast(int a, size_t x, size_t y, size_t z);
uint __ovld work_group_broadcast(uint a, size_t local_id);
uint __ovld work_group_broadcast(uint a, size_t x, size_t y);
uint __ovld work_group_broadcast(uint a, size_t x, size_t y, size_t z);
long __ovld work_group_broadcast(long a, size_t local_id);
long __ovld work_group_broadcast(long a, size_t x, size_t y);
long __ovld work_group_broadcast(long a, size_t x, size_t y, size_t z);
ulong __ovld work_group_broadcast(ulong a, size_t local_id);
ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y);
ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y, size_t z);
float __ovld work_group_broadcast(float a, size_t local_id);
float __ovld work_group_broadcast(float a, size_t x, size_t y);
float __ovld work_group_broadcast(float a, size_t x, size_t y, size_t z);
int __ovld __conv work_group_broadcast(int a, size_t local_id);
int __ovld __conv work_group_broadcast(int a, size_t x, size_t y);
int __ovld __conv work_group_broadcast(int a, size_t x, size_t y, size_t z);
uint __ovld __conv work_group_broadcast(uint a, size_t local_id);
uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y);
uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y, size_t z);
long __ovld __conv work_group_broadcast(long a, size_t local_id);
long __ovld __conv work_group_broadcast(long a, size_t x, size_t y);
long __ovld __conv work_group_broadcast(long a, size_t x, size_t y, size_t z);
ulong __ovld __conv work_group_broadcast(ulong a, size_t local_id);
ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y);
ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z);
float __ovld __conv work_group_broadcast(float a, size_t local_id);
float __ovld __conv work_group_broadcast(float a, size_t x, size_t y);
float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z);
#ifdef cl_khr_fp64
double __ovld work_group_broadcast(double a, size_t local_id);
double __ovld work_group_broadcast(double a, size_t x, size_t y);
double __ovld work_group_broadcast(double a, size_t x, size_t y, size_t z);
double __ovld __conv work_group_broadcast(double a, size_t local_id);
double __ovld __conv work_group_broadcast(double a, size_t x, size_t y);
double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z);
#endif //cl_khr_fp64
#ifdef cl_khr_fp16
half __ovld work_group_reduce_add(half x);
half __ovld work_group_reduce_min(half x);
half __ovld work_group_reduce_max(half x);
half __ovld work_group_scan_exclusive_add(half x);
half __ovld work_group_scan_exclusive_min(half x);
half __ovld work_group_scan_exclusive_max(half x);
half __ovld work_group_scan_inclusive_add(half x);
half __ovld work_group_scan_inclusive_min(half x);
half __ovld work_group_scan_inclusive_max(half x);
half __ovld __conv work_group_reduce_add(half x);
half __ovld __conv work_group_reduce_min(half x);
half __ovld __conv work_group_reduce_max(half x);
half __ovld __conv work_group_scan_exclusive_add(half x);
half __ovld __conv work_group_scan_exclusive_min(half x);
half __ovld __conv work_group_scan_exclusive_max(half x);
half __ovld __conv work_group_scan_inclusive_add(half x);
half __ovld __conv work_group_scan_inclusive_min(half x);
half __ovld __conv work_group_scan_inclusive_max(half x);
#endif
int __ovld work_group_reduce_add(int x);
int __ovld work_group_reduce_min(int x);
int __ovld work_group_reduce_max(int x);
int __ovld work_group_scan_exclusive_add(int x);
int __ovld work_group_scan_exclusive_min(int x);
int __ovld work_group_scan_exclusive_max(int x);
int __ovld work_group_scan_inclusive_add(int x);
int __ovld work_group_scan_inclusive_min(int x);
int __ovld work_group_scan_inclusive_max(int x);
uint __ovld work_group_reduce_add(uint x);
uint __ovld work_group_reduce_min(uint x);
uint __ovld work_group_reduce_max(uint x);
uint __ovld work_group_scan_exclusive_add(uint x);
uint __ovld work_group_scan_exclusive_min(uint x);
uint __ovld work_group_scan_exclusive_max(uint x);
uint __ovld work_group_scan_inclusive_add(uint x);
uint __ovld work_group_scan_inclusive_min(uint x);
uint __ovld work_group_scan_inclusive_max(uint x);
long __ovld work_group_reduce_add(long x);
long __ovld work_group_reduce_min(long x);
long __ovld work_group_reduce_max(long x);
long __ovld work_group_scan_exclusive_add(long x);
long __ovld work_group_scan_exclusive_min(long x);
long __ovld work_group_scan_exclusive_max(long x);
long __ovld work_group_scan_inclusive_add(long x);
long __ovld work_group_scan_inclusive_min(long x);
long __ovld work_group_scan_inclusive_max(long x);
ulong __ovld work_group_reduce_add(ulong x);
ulong __ovld work_group_reduce_min(ulong x);
ulong __ovld work_group_reduce_max(ulong x);
ulong __ovld work_group_scan_exclusive_add(ulong x);
ulong __ovld work_group_scan_exclusive_min(ulong x);
ulong __ovld work_group_scan_exclusive_max(ulong x);
ulong __ovld work_group_scan_inclusive_add(ulong x);
ulong __ovld work_group_scan_inclusive_min(ulong x);
ulong __ovld work_group_scan_inclusive_max(ulong x);
float __ovld work_group_reduce_add(float x);
float __ovld work_group_reduce_min(float x);
float __ovld work_group_reduce_max(float x);
float __ovld work_group_scan_exclusive_add(float x);
float __ovld work_group_scan_exclusive_min(float x);
float __ovld work_group_scan_exclusive_max(float x);
float __ovld work_group_scan_inclusive_add(float x);
float __ovld work_group_scan_inclusive_min(float x);
float __ovld work_group_scan_inclusive_max(float x);
int __ovld __conv work_group_reduce_add(int x);
int __ovld __conv work_group_reduce_min(int x);
int __ovld __conv work_group_reduce_max(int x);
int __ovld __conv work_group_scan_exclusive_add(int x);
int __ovld __conv work_group_scan_exclusive_min(int x);
int __ovld __conv work_group_scan_exclusive_max(int x);
int __ovld __conv work_group_scan_inclusive_add(int x);
int __ovld __conv work_group_scan_inclusive_min(int x);
int __ovld __conv work_group_scan_inclusive_max(int x);
uint __ovld __conv work_group_reduce_add(uint x);
uint __ovld __conv work_group_reduce_min(uint x);
uint __ovld __conv work_group_reduce_max(uint x);
uint __ovld __conv work_group_scan_exclusive_add(uint x);
uint __ovld __conv work_group_scan_exclusive_min(uint x);
uint __ovld __conv work_group_scan_exclusive_max(uint x);
uint __ovld __conv work_group_scan_inclusive_add(uint x);
uint __ovld __conv work_group_scan_inclusive_min(uint x);
uint __ovld __conv work_group_scan_inclusive_max(uint x);
long __ovld __conv work_group_reduce_add(long x);
long __ovld __conv work_group_reduce_min(long x);
long __ovld __conv work_group_reduce_max(long x);
long __ovld __conv work_group_scan_exclusive_add(long x);
long __ovld __conv work_group_scan_exclusive_min(long x);
long __ovld __conv work_group_scan_exclusive_max(long x);
long __ovld __conv work_group_scan_inclusive_add(long x);
long __ovld __conv work_group_scan_inclusive_min(long x);
long __ovld __conv work_group_scan_inclusive_max(long x);
ulong __ovld __conv work_group_reduce_add(ulong x);
ulong __ovld __conv work_group_reduce_min(ulong x);
ulong __ovld __conv work_group_reduce_max(ulong x);
ulong __ovld __conv work_group_scan_exclusive_add(ulong x);
ulong __ovld __conv work_group_scan_exclusive_min(ulong x);
ulong __ovld __conv work_group_scan_exclusive_max(ulong x);
ulong __ovld __conv work_group_scan_inclusive_add(ulong x);
ulong __ovld __conv work_group_scan_inclusive_min(ulong x);
ulong __ovld __conv work_group_scan_inclusive_max(ulong x);
float __ovld __conv work_group_reduce_add(float x);
float __ovld __conv work_group_reduce_min(float x);
float __ovld __conv work_group_reduce_max(float x);
float __ovld __conv work_group_scan_exclusive_add(float x);
float __ovld __conv work_group_scan_exclusive_min(float x);
float __ovld __conv work_group_scan_exclusive_max(float x);
float __ovld __conv work_group_scan_inclusive_add(float x);
float __ovld __conv work_group_scan_inclusive_min(float x);
float __ovld __conv work_group_scan_inclusive_max(float x);
#ifdef cl_khr_fp64
double __ovld work_group_reduce_add(double x);
double __ovld work_group_reduce_min(double x);
double __ovld work_group_reduce_max(double x);
double __ovld work_group_scan_exclusive_add(double x);
double __ovld work_group_scan_exclusive_min(double x);
double __ovld work_group_scan_exclusive_max(double x);
double __ovld work_group_scan_inclusive_add(double x);
double __ovld work_group_scan_inclusive_min(double x);
double __ovld work_group_scan_inclusive_max(double x);
double __ovld __conv work_group_reduce_add(double x);
double __ovld __conv work_group_reduce_min(double x);
double __ovld __conv work_group_reduce_max(double x);
double __ovld __conv work_group_scan_exclusive_add(double x);
double __ovld __conv work_group_scan_exclusive_min(double x);
double __ovld __conv work_group_scan_exclusive_max(double x);
double __ovld __conv work_group_scan_inclusive_add(double x);
double __ovld __conv work_group_scan_inclusive_min(double x);
double __ovld __conv work_group_scan_inclusive_max(double x);
#endif //cl_khr_fp64
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@ -16762,92 +16763,92 @@ uint __ovld get_enqueued_num_sub_groups(void);
uint __ovld get_sub_group_id(void);
uint __ovld get_sub_group_local_id(void);
void __ovld sub_group_barrier(cl_mem_fence_flags flags);
void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags);
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
void __ovld sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
int __ovld sub_group_all(int predicate);
int __ovld sub_group_any(int predicate);
int __ovld __conv sub_group_all(int predicate);
int __ovld __conv sub_group_any(int predicate);
int __ovld sub_group_broadcast(int x, uint sub_group_local_id);
uint __ovld sub_group_broadcast(uint x, uint sub_group_local_id);
long __ovld sub_group_broadcast(long x, uint sub_group_local_id);
ulong __ovld sub_group_broadcast(ulong x, uint sub_group_local_id);
float __ovld sub_group_broadcast(float x, uint sub_group_local_id);
int __ovld __conv sub_group_broadcast(int x, uint sub_group_local_id);
uint __ovld __conv sub_group_broadcast(uint x, uint sub_group_local_id);
long __ovld __conv sub_group_broadcast(long x, uint sub_group_local_id);
ulong __ovld __conv sub_group_broadcast(ulong x, uint sub_group_local_id);
float __ovld __conv sub_group_broadcast(float x, uint sub_group_local_id);
int __ovld sub_group_reduce_add(int x);
uint __ovld sub_group_reduce_add(uint x);
long __ovld sub_group_reduce_add(long x);
ulong __ovld sub_group_reduce_add(ulong x);
float __ovld sub_group_reduce_add(float x);
int __ovld sub_group_reduce_min(int x);
uint __ovld sub_group_reduce_min(uint x);
long __ovld sub_group_reduce_min(long x);
ulong __ovld sub_group_reduce_min(ulong x);
float __ovld sub_group_reduce_min(float x);
int __ovld sub_group_reduce_max(int x);
uint __ovld sub_group_reduce_max(uint x);
long __ovld sub_group_reduce_max(long x);
ulong __ovld sub_group_reduce_max(ulong x);
float __ovld sub_group_reduce_max(float x);
int __ovld __conv sub_group_reduce_add(int x);
uint __ovld __conv sub_group_reduce_add(uint x);
long __ovld __conv sub_group_reduce_add(long x);
ulong __ovld __conv sub_group_reduce_add(ulong x);
float __ovld __conv sub_group_reduce_add(float x);
int __ovld __conv sub_group_reduce_min(int x);
uint __ovld __conv sub_group_reduce_min(uint x);
long __ovld __conv sub_group_reduce_min(long x);
ulong __ovld __conv sub_group_reduce_min(ulong x);
float __ovld __conv sub_group_reduce_min(float x);
int __ovld __conv sub_group_reduce_max(int x);
uint __ovld __conv sub_group_reduce_max(uint x);
long __ovld __conv sub_group_reduce_max(long x);
ulong __ovld __conv sub_group_reduce_max(ulong x);
float __ovld __conv sub_group_reduce_max(float x);
int __ovld sub_group_scan_exclusive_add(int x);
uint __ovld sub_group_scan_exclusive_add(uint x);
long __ovld sub_group_scan_exclusive_add(long x);
ulong __ovld sub_group_scan_exclusive_add(ulong x);
float __ovld sub_group_scan_exclusive_add(float x);
int __ovld sub_group_scan_exclusive_min(int x);
uint __ovld sub_group_scan_exclusive_min(uint x);
long __ovld sub_group_scan_exclusive_min(long x);
ulong __ovld sub_group_scan_exclusive_min(ulong x);
float __ovld sub_group_scan_exclusive_min(float x);
int __ovld sub_group_scan_exclusive_max(int x);
uint __ovld sub_group_scan_exclusive_max(uint x);
long __ovld sub_group_scan_exclusive_max(long x);
ulong __ovld sub_group_scan_exclusive_max(ulong x);
float __ovld sub_group_scan_exclusive_max(float x);
int __ovld __conv sub_group_scan_exclusive_add(int x);
uint __ovld __conv sub_group_scan_exclusive_add(uint x);
long __ovld __conv sub_group_scan_exclusive_add(long x);
ulong __ovld __conv sub_group_scan_exclusive_add(ulong x);
float __ovld __conv sub_group_scan_exclusive_add(float x);
int __ovld __conv sub_group_scan_exclusive_min(int x);
uint __ovld __conv sub_group_scan_exclusive_min(uint x);
long __ovld __conv sub_group_scan_exclusive_min(long x);
ulong __ovld __conv sub_group_scan_exclusive_min(ulong x);
float __ovld __conv sub_group_scan_exclusive_min(float x);
int __ovld __conv sub_group_scan_exclusive_max(int x);
uint __ovld __conv sub_group_scan_exclusive_max(uint x);
long __ovld __conv sub_group_scan_exclusive_max(long x);
ulong __ovld __conv sub_group_scan_exclusive_max(ulong x);
float __ovld __conv sub_group_scan_exclusive_max(float x);
int __ovld sub_group_scan_inclusive_add(int x);
uint __ovld sub_group_scan_inclusive_add(uint x);
long __ovld sub_group_scan_inclusive_add(long x);
ulong __ovld sub_group_scan_inclusive_add(ulong x);
float __ovld sub_group_scan_inclusive_add(float x);
int __ovld sub_group_scan_inclusive_min(int x);
uint __ovld sub_group_scan_inclusive_min(uint x);
long __ovld sub_group_scan_inclusive_min(long x);
ulong __ovld sub_group_scan_inclusive_min(ulong x);
float __ovld sub_group_scan_inclusive_min(float x);
int __ovld sub_group_scan_inclusive_max(int x);
uint __ovld sub_group_scan_inclusive_max(uint x);
long __ovld sub_group_scan_inclusive_max(long x);
ulong __ovld sub_group_scan_inclusive_max(ulong x);
float __ovld sub_group_scan_inclusive_max(float x);
int __ovld __conv sub_group_scan_inclusive_add(int x);
uint __ovld __conv sub_group_scan_inclusive_add(uint x);
long __ovld __conv sub_group_scan_inclusive_add(long x);
ulong __ovld __conv sub_group_scan_inclusive_add(ulong x);
float __ovld __conv sub_group_scan_inclusive_add(float x);
int __ovld __conv sub_group_scan_inclusive_min(int x);
uint __ovld __conv sub_group_scan_inclusive_min(uint x);
long __ovld __conv sub_group_scan_inclusive_min(long x);
ulong __ovld __conv sub_group_scan_inclusive_min(ulong x);
float __ovld __conv sub_group_scan_inclusive_min(float x);
int __ovld __conv sub_group_scan_inclusive_max(int x);
uint __ovld __conv sub_group_scan_inclusive_max(uint x);
long __ovld __conv sub_group_scan_inclusive_max(long x);
ulong __ovld __conv sub_group_scan_inclusive_max(ulong x);
float __ovld __conv sub_group_scan_inclusive_max(float x);
#ifdef cl_khr_fp16
half __ovld sub_group_broadcast(half x, uint sub_group_local_id);
half __ovld sub_group_reduce_add(half x);
half __ovld sub_group_reduce_min(half x);
half __ovld sub_group_reduce_max(half x);
half __ovld sub_group_scan_exclusive_add(half x);
half __ovld sub_group_scan_exclusive_min(half x);
half __ovld sub_group_scan_exclusive_max(half x);
half __ovld sub_group_scan_inclusive_add(half x);
half __ovld sub_group_scan_inclusive_min(half x);
half __ovld sub_group_scan_inclusive_max(half x);
half __ovld __conv sub_group_broadcast(half x, uint sub_group_local_id);
half __ovld __conv sub_group_reduce_add(half x);
half __ovld __conv sub_group_reduce_min(half x);
half __ovld __conv sub_group_reduce_max(half x);
half __ovld __conv sub_group_scan_exclusive_add(half x);
half __ovld __conv sub_group_scan_exclusive_min(half x);
half __ovld __conv sub_group_scan_exclusive_max(half x);
half __ovld __conv sub_group_scan_inclusive_add(half x);
half __ovld __conv sub_group_scan_inclusive_min(half x);
half __ovld __conv sub_group_scan_inclusive_max(half x);
#endif //cl_khr_fp16
#ifdef cl_khr_fp64
double __ovld sub_group_broadcast(double x, uint sub_group_local_id);
double __ovld sub_group_reduce_add(double x);
double __ovld sub_group_reduce_min(double x);
double __ovld sub_group_reduce_max(double x);
double __ovld sub_group_scan_exclusive_add(double x);
double __ovld sub_group_scan_exclusive_min(double x);
double __ovld sub_group_scan_exclusive_max(double x);
double __ovld sub_group_scan_inclusive_add(double x);
double __ovld sub_group_scan_inclusive_min(double x);
double __ovld sub_group_scan_inclusive_max(double x);
double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id);
double __ovld __conv sub_group_reduce_add(double x);
double __ovld __conv sub_group_reduce_min(double x);
double __ovld __conv sub_group_reduce_max(double x);
double __ovld __conv sub_group_scan_exclusive_add(double x);
double __ovld __conv sub_group_scan_exclusive_min(double x);
double __ovld __conv sub_group_scan_exclusive_max(double x);
double __ovld __conv sub_group_scan_inclusive_add(double x);
double __ovld __conv sub_group_scan_inclusive_min(double x);
double __ovld __conv sub_group_scan_inclusive_max(double x);
#endif //cl_khr_fp64
#endif //cl_khr_subgroups cl_intel_subgroups

View File

@ -5857,6 +5857,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case AttributeList::AT_NoDuplicate:
handleSimpleAttribute<NoDuplicateAttr>(S, D, Attr);
break;
case AttributeList::AT_Convergent:
handleSimpleAttribute<ConvergentAttr>(S, D, Attr);
break;
case AttributeList::AT_NoInline:
handleSimpleAttribute<NoInlineAttr>(S, D, Attr);
break;

View File

@ -0,0 +1,118 @@
// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | FileCheck %s
void convfun(void) __attribute__((convergent));
void non_convfun(void);
void nodupfun(void) __attribute__((noduplicate));
void f(void);
void g(void);
// Test two if's are merged and non_convfun duplicated.
// The LLVM IR is equivalent to:
// if (a) {
// f();
// non_convfun();
// g();
// } else {
// non_convfun();
// }
//
// CHECK: define spir_func void @test_merge_if(i32 %[[a:.+]])
// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0
// CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]]
// CHECK: [[if_then]]:
// CHECK: tail call spir_func void @f()
// CHECK: tail call spir_func void @non_convfun()
// CHECK: tail call spir_func void @g()
// CHECK: br label %[[if_end3:.+]]
// CHECK: [[if_end3_critedge]]:
// CHECK: tail call spir_func void @non_convfun()
// CHECK: br label %[[if_end3]]
// CHECK: [[if_end3]]:
// CHECK-LABEL: ret void
void test_merge_if(int a) {
if (a) {
f();
}
non_convfun();
if (a) {
g();
}
}
// CHECK-DAG: declare spir_func void @f()
// CHECK-DAG: declare spir_func void @non_convfun()
// CHECK-DAG: declare spir_func void @g()
// Test two if's are not merged.
// CHECK: define spir_func void @test_no_merge_if(i32 %[[a:.+]])
// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0
// CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]]
// CHECK: [[if_then]]:
// CHECK: tail call spir_func void @f()
// CHECK-NOT: call spir_func void @convfun()
// CHECK-NOT: call spir_func void @g()
// CHECK: br label %[[if_end]]
// CHECK: [[if_end]]:
// CHECK: %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ]
// CHECK: tail call spir_func void @convfun() #[[attr5:.+]]
// CHECK: br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]]
// CHECK: [[if_then2]]:
// CHECK: tail call spir_func void @g()
// CHECK: br label %[[if_end3:.+]]
// CHECK: [[if_end3]]:
// CHECK-LABEL: ret void
void test_no_merge_if(int a) {
if (a) {
f();
}
convfun();
if(a) {
g();
}
}
// CHECK: declare spir_func void @convfun(){{[^#]*}} #[[attr2:[0-9]+]]
// Test loop is unrolled for convergent function.
// CHECK-LABEL: define spir_func void @test_unroll()
// CHECK: tail call spir_func void @convfun() #[[attr5:[0-9]+]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK: tail call spir_func void @convfun() #[[attr5]]
// CHECK-LABEL: ret void
void test_unroll() {
for (int i = 0; i < 10; i++)
convfun();
}
// Test loop is not unrolled for noduplicate function.
// CHECK-LABEL: define spir_func void @test_not_unroll()
// CHECK: br label %[[for_body:.+]]
// CHECK: [[for_cond_cleanup:.+]]:
// CHECK: ret void
// CHECK: [[for_body]]:
// CHECK: tail call spir_func void @nodupfun() #[[attr6:[0-9]+]]
// CHECK-NOT: call spir_func void @nodupfun()
// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
void test_not_unroll() {
for (int i = 0; i < 10; i++)
nodupfun();
}
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
// CHECK-DAG: attributes #[[attr2]] = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK-DAG: attributes #[[attr3]] = { {{[^}]*}}noduplicate{{[^}]*}} }
// CHECK-DAG: attributes #[[attr5]] = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK-DAG: attributes #[[attr6]] = { {{[^}]*}}noduplicate{{[^}]*}} }

View File

@ -0,0 +1,12 @@
// RUN: %clang_cc1 -triple spir-unknown-unknown -fsyntax-only -verify %s
void f1(void) __attribute__((convergent));
void f2(void) __attribute__((convergent(1))); // expected-error {{'convergent' attribute takes no arguments}}
void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions}}
void f4(void) {
int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions}}
}