diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 3c3f06c5ea3a..7e9b5eea8325 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -61,248 +61,506 @@ BUILTIN(__builtin_ptx_bar_sync, "vi", "n") // Builtins exposed as part of NVVM -BUILTIN(__syncthreads, "v", "n") -BUILTIN(__nvvm_bar0, "v", "n") -BUILTIN(__nvvm_bar0_popc, "ii", "n") -BUILTIN(__nvvm_bar0_and, "ii", "n") -BUILTIN(__nvvm_bar0_or, "ii", "n") -BUILTIN(__nvvm_membar_cta, "v", "n") -BUILTIN(__nvvm_membar_gl, "v", "n") -BUILTIN(__nvvm_membar_sys, "v", "n") -BUILTIN(__nvvm_popc_i, "ii", "nc") -BUILTIN(__nvvm_popc_ll, "LiLi", "nc") -BUILTIN(__nvvm_prmt, "UiUiUiUi", "nc") -BUILTIN(__nvvm_min_i, "iii", "nc") -BUILTIN(__nvvm_min_ui, "UiUiUi", "nc") -BUILTIN(__nvvm_min_ll, "LLiLLiLLi", "nc") -BUILTIN(__nvvm_min_ull, "ULLiULLiULLi", "nc") -BUILTIN(__nvvm_max_i, "iii", "nc") -BUILTIN(__nvvm_max_ui, "UiUiUi", "nc") -BUILTIN(__nvvm_max_ll, "LLiLLiLLi", "nc") -BUILTIN(__nvvm_max_ull, "ULLiULLiULLi", "nc") -BUILTIN(__nvvm_mulhi_i, "iii", "nc") -BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "nc") -BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "nc") -BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "nc") -BUILTIN(__nvvm_mul24_i, "iii", "nc") -BUILTIN(__nvvm_mul24_ui, "UiUiUi", "nc") -BUILTIN(__nvvm_brev32, "UiUi", "nc") -BUILTIN(__nvvm_brev64, "ULLiULLi", "nc") -BUILTIN(__nvvm_sad_i, "iiii", "nc") -BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "nc") -BUILTIN(__nvvm_abs_i, "ii", "nc") -BUILTIN(__nvvm_abs_ll, "LiLi", "nc") -BUILTIN(__nvvm_floor_ftz_f, "ff", "nc") -BUILTIN(__nvvm_floor_f, "ff", "nc") -BUILTIN(__nvvm_floor_d, "dd", "nc") -BUILTIN(__nvvm_fabs_ftz_f, "ff", "nc") -BUILTIN(__nvvm_fabs_f, "ff", "nc") -BUILTIN(__nvvm_fabs_d, "dd", "nc") -BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "nc") -BUILTIN(__nvvm_fmin_ftz_f, "fff", "nc") -BUILTIN(__nvvm_fmin_f, "fff", "nc") -BUILTIN(__nvvm_fmax_ftz_f, "fff", "nc") -BUILTIN(__nvvm_fmax_f, "fff", "nc") -BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "nc") -BUILTIN(__nvvm_rsqrt_approx_f, "ff", "nc") -BUILTIN(__nvvm_fmin_d, "ddd", "nc") -BUILTIN(__nvvm_fmax_d, "ddd", "nc") -BUILTIN(__nvvm_rsqrt_approx_d, "dd", "nc") -BUILTIN(__nvvm_ceil_d, "dd", "nc") -BUILTIN(__nvvm_trunc_d, "dd", "nc") -BUILTIN(__nvvm_round_d, "dd", "nc") -BUILTIN(__nvvm_ex2_approx_d, "dd", "nc") -BUILTIN(__nvvm_lg2_approx_d, "dd", "nc") -BUILTIN(__nvvm_round_ftz_f, "ff", "nc") -BUILTIN(__nvvm_round_f, "ff", "nc") -BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "nc") -BUILTIN(__nvvm_ex2_approx_f, "ff", "nc") -BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "nc") -BUILTIN(__nvvm_lg2_approx_f, "ff", "nc") -BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "nc") -BUILTIN(__nvvm_sin_approx_f, "ff", "nc") -BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "nc") -BUILTIN(__nvvm_cos_approx_f, "ff", "nc") -BUILTIN(__nvvm_trunc_ftz_f, "ff", "nc") -BUILTIN(__nvvm_trunc_f, "ff", "nc") -BUILTIN(__nvvm_ceil_ftz_f, "ff", "nc") -BUILTIN(__nvvm_ceil_f, "ff", "nc") -BUILTIN(__nvvm_saturate_d, "dd", "nc") -BUILTIN(__nvvm_saturate_ftz_f, "ff", "nc") -BUILTIN(__nvvm_saturate_f, "ff", "nc") -BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "nc") -BUILTIN(__nvvm_fma_rn_f, "ffff", "nc") -BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "nc") -BUILTIN(__nvvm_fma_rz_f, "ffff", "nc") -BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "nc") -BUILTIN(__nvvm_fma_rm_f, "ffff", "nc") -BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "nc") -BUILTIN(__nvvm_fma_rp_f, "ffff", "nc") -BUILTIN(__nvvm_fma_rn_d, "dddd", "nc") -BUILTIN(__nvvm_fma_rz_d, "dddd", "nc") -BUILTIN(__nvvm_fma_rm_d, "dddd", "nc") -BUILTIN(__nvvm_fma_rp_d, "dddd", "nc") -BUILTIN(__nvvm_div_approx_ftz_f, "fff", "nc") -BUILTIN(__nvvm_div_approx_f, "fff", "nc") -BUILTIN(__nvvm_div_rn_ftz_f, "fff", "nc") -BUILTIN(__nvvm_div_rn_f, "fff", "nc") -BUILTIN(__nvvm_div_rz_ftz_f, "fff", "nc") -BUILTIN(__nvvm_div_rz_f, "fff", "nc") -BUILTIN(__nvvm_div_rm_ftz_f, "fff", "nc") -BUILTIN(__nvvm_div_rm_f, "fff", "nc") -BUILTIN(__nvvm_div_rp_ftz_f, "fff", "nc") -BUILTIN(__nvvm_div_rp_f, "fff", "nc") -BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "nc") -BUILTIN(__nvvm_rcp_rn_f, "ff", "nc") -BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "nc") -BUILTIN(__nvvm_rcp_rz_f, "ff", "nc") -BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "nc") -BUILTIN(__nvvm_rcp_rm_f, "ff", "nc") -BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "nc") -BUILTIN(__nvvm_rcp_rp_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_rn_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_rz_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_rm_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_rp_f, "ff", "nc") -BUILTIN(__nvvm_div_rn_d, "ddd", "nc") -BUILTIN(__nvvm_div_rz_d, "ddd", "nc") -BUILTIN(__nvvm_div_rm_d, "ddd", "nc") -BUILTIN(__nvvm_div_rp_d, "ddd", "nc") -BUILTIN(__nvvm_rcp_rn_d, "dd", "nc") -BUILTIN(__nvvm_rcp_rz_d, "dd", "nc") -BUILTIN(__nvvm_rcp_rm_d, "dd", "nc") -BUILTIN(__nvvm_rcp_rp_d, "dd", "nc") -BUILTIN(__nvvm_sqrt_rn_d, "dd", "nc") -BUILTIN(__nvvm_sqrt_rz_d, "dd", "nc") -BUILTIN(__nvvm_sqrt_rm_d, "dd", "nc") -BUILTIN(__nvvm_sqrt_rp_d, "dd", "nc") -BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "nc") -BUILTIN(__nvvm_sqrt_approx_f, "ff", "nc") -BUILTIN(__nvvm_add_rn_d, "ddd", "nc") -BUILTIN(__nvvm_add_rz_d, "ddd", "nc") -BUILTIN(__nvvm_add_rm_d, "ddd", "nc") -BUILTIN(__nvvm_add_rp_d, "ddd", "nc") -BUILTIN(__nvvm_mul_rn_d, "ddd", "nc") -BUILTIN(__nvvm_mul_rz_d, "ddd", "nc") -BUILTIN(__nvvm_mul_rm_d, "ddd", "nc") -BUILTIN(__nvvm_mul_rp_d, "ddd", "nc") -BUILTIN(__nvvm_add_rm_ftz_f, "fff", "nc") -BUILTIN(__nvvm_add_rm_f, "fff", "nc") -BUILTIN(__nvvm_add_rp_ftz_f, "fff", "nc") -BUILTIN(__nvvm_add_rp_f, "fff", "nc") -BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "nc") -BUILTIN(__nvvm_mul_rm_f, "fff", "nc") -BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "nc") -BUILTIN(__nvvm_mul_rp_f, "fff", "nc") -BUILTIN(__nvvm_add_rn_ftz_f, "fff", "nc") -BUILTIN(__nvvm_add_rn_f, "fff", "nc") -BUILTIN(__nvvm_add_rz_ftz_f, "fff", "nc") -BUILTIN(__nvvm_add_rz_f, "fff", "nc") -BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "nc") -BUILTIN(__nvvm_mul_rn_f, "fff", "nc") -BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "nc") -BUILTIN(__nvvm_mul_rz_f, "fff", "nc") -BUILTIN(__nvvm_d2f_rn_ftz, "fd", "nc") -BUILTIN(__nvvm_d2f_rn, "fd", "nc") -BUILTIN(__nvvm_d2f_rz_ftz, "fd", "nc") -BUILTIN(__nvvm_d2f_rz, "fd", "nc") -BUILTIN(__nvvm_d2f_rm_ftz, "fd", "nc") -BUILTIN(__nvvm_d2f_rm, "fd", "nc") -BUILTIN(__nvvm_d2f_rp_ftz, "fd", "nc") -BUILTIN(__nvvm_d2f_rp, "fd", "nc") -BUILTIN(__nvvm_d2i_rn, "id", "nc") -BUILTIN(__nvvm_d2i_rz, "id", "nc") -BUILTIN(__nvvm_d2i_rm, "id", "nc") -BUILTIN(__nvvm_d2i_rp, "id", "nc") -BUILTIN(__nvvm_d2ui_rn, "Uid", "nc") -BUILTIN(__nvvm_d2ui_rz, "Uid", "nc") -BUILTIN(__nvvm_d2ui_rm, "Uid", "nc") -BUILTIN(__nvvm_d2ui_rp, "Uid", "nc") -BUILTIN(__nvvm_i2d_rn, "di", "nc") -BUILTIN(__nvvm_i2d_rz, "di", "nc") -BUILTIN(__nvvm_i2d_rm, "di", "nc") -BUILTIN(__nvvm_i2d_rp, "di", "nc") -BUILTIN(__nvvm_ui2d_rn, "dUi", "nc") -BUILTIN(__nvvm_ui2d_rz, "dUi", "nc") -BUILTIN(__nvvm_ui2d_rm, "dUi", "nc") -BUILTIN(__nvvm_ui2d_rp, "dUi", "nc") -BUILTIN(__nvvm_f2i_rn_ftz, "if", "nc") -BUILTIN(__nvvm_f2i_rn, "if", "nc") -BUILTIN(__nvvm_f2i_rz_ftz, "if", "nc") -BUILTIN(__nvvm_f2i_rz, "if", "nc") -BUILTIN(__nvvm_f2i_rm_ftz, "if", "nc") -BUILTIN(__nvvm_f2i_rm, "if", "nc") -BUILTIN(__nvvm_f2i_rp_ftz, "if", "nc") -BUILTIN(__nvvm_f2i_rp, "if", "nc") -BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "nc") -BUILTIN(__nvvm_f2ui_rn, "Uif", "nc") -BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "nc") -BUILTIN(__nvvm_f2ui_rz, "Uif", "nc") -BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "nc") -BUILTIN(__nvvm_f2ui_rm, "Uif", "nc") -BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "nc") -BUILTIN(__nvvm_f2ui_rp, "Uif", "nc") -BUILTIN(__nvvm_i2f_rn, "fi", "nc") -BUILTIN(__nvvm_i2f_rz, "fi", "nc") -BUILTIN(__nvvm_i2f_rm, "fi", "nc") -BUILTIN(__nvvm_i2f_rp, "fi", "nc") -BUILTIN(__nvvm_ui2f_rn, "fUi", "nc") -BUILTIN(__nvvm_ui2f_rz, "fUi", "nc") -BUILTIN(__nvvm_ui2f_rm, "fUi", "nc") -BUILTIN(__nvvm_ui2f_rp, "fUi", "nc") -BUILTIN(__nvvm_lohi_i2d, "dii", "nc") -BUILTIN(__nvvm_d2i_lo, "id", "nc") -BUILTIN(__nvvm_d2i_hi, "id", "nc") -BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "nc") -BUILTIN(__nvvm_f2ll_rn, "LLif", "nc") -BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "nc") -BUILTIN(__nvvm_f2ll_rz, "LLif", "nc") -BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "nc") -BUILTIN(__nvvm_f2ll_rm, "LLif", "nc") -BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "nc") -BUILTIN(__nvvm_f2ll_rp, "LLif", "nc") -BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "nc") -BUILTIN(__nvvm_f2ull_rn, "ULLif", "nc") -BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "nc") -BUILTIN(__nvvm_f2ull_rz, "ULLif", "nc") -BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "nc") -BUILTIN(__nvvm_f2ull_rm, "ULLif", "nc") -BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "nc") -BUILTIN(__nvvm_f2ull_rp, "ULLif", "nc") -BUILTIN(__nvvm_d2ll_rn, "LLid", "nc") -BUILTIN(__nvvm_d2ll_rz, "LLid", "nc") -BUILTIN(__nvvm_d2ll_rm, "LLid", "nc") -BUILTIN(__nvvm_d2ll_rp, "LLid", "nc") -BUILTIN(__nvvm_d2ull_rn, "ULLid", "nc") -BUILTIN(__nvvm_d2ull_rz, "ULLid", "nc") -BUILTIN(__nvvm_d2ull_rm, "ULLid", "nc") -BUILTIN(__nvvm_d2ull_rp, "ULLid", "nc") -BUILTIN(__nvvm_ll2f_rn, "fLLi", "nc") -BUILTIN(__nvvm_ll2f_rz, "fLLi", "nc") -BUILTIN(__nvvm_ll2f_rm, "fLLi", "nc") -BUILTIN(__nvvm_ll2f_rp, "fLLi", "nc") -BUILTIN(__nvvm_ull2f_rn, "fULLi", "nc") -BUILTIN(__nvvm_ull2f_rz, "fULLi", "nc") -BUILTIN(__nvvm_ull2f_rm, "fULLi", "nc") -BUILTIN(__nvvm_ull2f_rp, "fULLi", "nc") -BUILTIN(__nvvm_ll2d_rn, "dLLi", "nc") -BUILTIN(__nvvm_ll2d_rz, "dLLi", "nc") -BUILTIN(__nvvm_ll2d_rm, "dLLi", "nc") -BUILTIN(__nvvm_ll2d_rp, "dLLi", "nc") -BUILTIN(__nvvm_ull2d_rn, "dULLi", "nc") -BUILTIN(__nvvm_ull2d_rz, "dULLi", "nc") -BUILTIN(__nvvm_ull2d_rm, "dULLi", "nc") -BUILTIN(__nvvm_ull2d_rp, "dULLi", "nc") -BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "nc") -BUILTIN(__nvvm_f2h_rn, "Usf", "nc") -BUILTIN(__nvvm_h2f, "fUs", "nc") -BUILTIN(__nvvm_bitcast_i2f, "fi", "nc") -BUILTIN(__nvvm_bitcast_f2i, "if", "nc") -BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "nc") -BUILTIN(__nvvm_bitcast_d2ll, "LLid", "nc") +// MISC + +BUILTIN(__nvvm_clz_i, "ii", "") +BUILTIN(__nvvm_clz_ll, "iLLi", "") +BUILTIN(__nvvm_popc_i, "ii", "") +BUILTIN(__nvvm_popc_ll, "iLLi", "") +BUILTIN(__nvvm_prmt, "UiUiUiUi", "") + +// Min Max + +BUILTIN(__nvvm_min_i, "iii", "") +BUILTIN(__nvvm_min_ui, "UiUiUi", "") +BUILTIN(__nvvm_min_ll, "LLiLLiLLi", "") +BUILTIN(__nvvm_min_ull, "ULLiULLiULLi", "") + +BUILTIN(__nvvm_max_i, "iii", "") +BUILTIN(__nvvm_max_ui, "UiUiUi", "") +BUILTIN(__nvvm_max_ll, "LLiLLiLLi", "") +BUILTIN(__nvvm_max_ull, "ULLiULLiULLi", "") + +BUILTIN(__nvvm_fmax_ftz_f, "fff", "") +BUILTIN(__nvvm_fmax_f, "fff", "") +BUILTIN(__nvvm_fmin_ftz_f, "fff", "") +BUILTIN(__nvvm_fmin_f, "fff", "") + +BUILTIN(__nvvm_fmax_d, "ddd", "") +BUILTIN(__nvvm_fmin_d, "ddd", "") + +// Multiplication + +BUILTIN(__nvvm_mulhi_i, "iii", "") +BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "") +BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "") +BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "") + +BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "") +BUILTIN(__nvvm_mul_rn_f, "fff", "") +BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "") +BUILTIN(__nvvm_mul_rz_f, "fff", "") +BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "") +BUILTIN(__nvvm_mul_rm_f, "fff", "") +BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "") +BUILTIN(__nvvm_mul_rp_f, "fff", "") + +BUILTIN(__nvvm_mul_rn_d, "ddd", "") +BUILTIN(__nvvm_mul_rz_d, "ddd", "") +BUILTIN(__nvvm_mul_rm_d, "ddd", "") +BUILTIN(__nvvm_mul_rp_d, "ddd", "") + +BUILTIN(__nvvm_mul24_i, "iii", "") +BUILTIN(__nvvm_mul24_ui, "UiUiUi", "") + +// Div + +BUILTIN(__nvvm_div_approx_ftz_f, "fff", "") +BUILTIN(__nvvm_div_approx_f, "fff", "") + +BUILTIN(__nvvm_div_rn_ftz_f, "fff", "") +BUILTIN(__nvvm_div_rn_f, "fff", "") +BUILTIN(__nvvm_div_rz_ftz_f, "fff", "") +BUILTIN(__nvvm_div_rz_f, "fff", "") +BUILTIN(__nvvm_div_rm_ftz_f, "fff", "") +BUILTIN(__nvvm_div_rm_f, "fff", "") +BUILTIN(__nvvm_div_rp_ftz_f, "fff", "") +BUILTIN(__nvvm_div_rp_f, "fff", "") + +BUILTIN(__nvvm_div_rn_d, "ddd", "") +BUILTIN(__nvvm_div_rz_d, "ddd", "") +BUILTIN(__nvvm_div_rm_d, "ddd", "") +BUILTIN(__nvvm_div_rp_d, "ddd", "") + +// Brev + +BUILTIN(__nvvm_brev32, "UiUi", "") +BUILTIN(__nvvm_brev64, "ULLiULLi", "") + +// Sad + +BUILTIN(__nvvm_sad_i, "iii", "") +BUILTIN(__nvvm_sad_ui, "UiUiUi", "") + +// Floor, Ceil + +BUILTIN(__nvvm_floor_ftz_f, "ff", "") +BUILTIN(__nvvm_floor_f, "ff", "") +BUILTIN(__nvvm_floor_d, "dd", "") + +BUILTIN(__nvvm_ceil_ftz_f, "ff", "") +BUILTIN(__nvvm_ceil_f, "ff", "") +BUILTIN(__nvvm_ceil_d, "dd", "") + +// Abs + +BUILTIN(__nvvm_abs_i, "ii", "") +BUILTIN(__nvvm_abs_ll, "LLiLLi", "") + +BUILTIN(__nvvm_fabs_ftz_f, "ff", "") +BUILTIN(__nvvm_fabs_f, "ff", "") +BUILTIN(__nvvm_fabs_d, "dd", "") + +// Round + +BUILTIN(__nvvm_round_ftz_f, "ff", "") +BUILTIN(__nvvm_round_f, "ff", "") +BUILTIN(__nvvm_round_d, "dd", "") + +// Trunc + +BUILTIN(__nvvm_trunc_ftz_f, "ff", "") +BUILTIN(__nvvm_trunc_f, "ff", "") +BUILTIN(__nvvm_trunc_d, "dd", "") + +// Saturate + +BUILTIN(__nvvm_saturate_ftz_f, "ff", "") +BUILTIN(__nvvm_saturate_f, "ff", "") +BUILTIN(__nvvm_saturate_d, "dd", "") + +// Exp2, Log2 + +BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "") +BUILTIN(__nvvm_ex2_approx_f, "ff", "") +BUILTIN(__nvvm_ex2_approx_d, "dd", "") + +BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "") +BUILTIN(__nvvm_lg2_approx_f, "ff", "") +BUILTIN(__nvvm_lg2_approx_d, "dd", "") + +// Sin, Cos + +BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "") +BUILTIN(__nvvm_sin_approx_f, "ff", "") + +BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "") +BUILTIN(__nvvm_cos_approx_f, "ff", "") + +// Fma + +BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "") +BUILTIN(__nvvm_fma_rn_f, "ffff", "") +BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "") +BUILTIN(__nvvm_fma_rz_f, "ffff", "") +BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "") +BUILTIN(__nvvm_fma_rm_f, "ffff", "") +BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "") +BUILTIN(__nvvm_fma_rp_f, "ffff", "") +BUILTIN(__nvvm_fma_rn_d, "dddd", "") +BUILTIN(__nvvm_fma_rz_d, "dddd", "") +BUILTIN(__nvvm_fma_rm_d, "dddd", "") +BUILTIN(__nvvm_fma_rp_d, "dddd", "") + +// Rcp + +BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "") +BUILTIN(__nvvm_rcp_rn_f, "ff", "") +BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "") +BUILTIN(__nvvm_rcp_rz_f, "ff", "") +BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "") +BUILTIN(__nvvm_rcp_rm_f, "ff", "") +BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "") +BUILTIN(__nvvm_rcp_rp_f, "ff", "") + +BUILTIN(__nvvm_rcp_rn_d, "dd", "") +BUILTIN(__nvvm_rcp_rz_d, "dd", "") +BUILTIN(__nvvm_rcp_rm_d, "dd", "") +BUILTIN(__nvvm_rcp_rp_d, "dd", "") +BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "") + +// Sqrt + +BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "") +BUILTIN(__nvvm_sqrt_rn_f, "ff", "") +BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "") +BUILTIN(__nvvm_sqrt_rz_f, "ff", "") +BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "") +BUILTIN(__nvvm_sqrt_rm_f, "ff", "") +BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "") +BUILTIN(__nvvm_sqrt_rp_f, "ff", "") +BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "") +BUILTIN(__nvvm_sqrt_approx_f, "ff", "") + +BUILTIN(__nvvm_sqrt_rn_d, "dd", "") +BUILTIN(__nvvm_sqrt_rz_d, "dd", "") +BUILTIN(__nvvm_sqrt_rm_d, "dd", "") +BUILTIN(__nvvm_sqrt_rp_d, "dd", "") + +// Rsqrt + +BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "") +BUILTIN(__nvvm_rsqrt_approx_f, "ff", "") +BUILTIN(__nvvm_rsqrt_approx_d, "dd", "") + +// Add + +BUILTIN(__nvvm_add_rn_ftz_f, "ff", "") +BUILTIN(__nvvm_add_rn_f, "ff", "") +BUILTIN(__nvvm_add_rz_ftz_f, "ff", "") +BUILTIN(__nvvm_add_rz_f, "ff", "") +BUILTIN(__nvvm_add_rm_ftz_f, "ff", "") +BUILTIN(__nvvm_add_rm_f, "ff", "") +BUILTIN(__nvvm_add_rp_ftz_f, "ff", "") +BUILTIN(__nvvm_add_rp_f, "ff", "") + +BUILTIN(__nvvm_add_rn_d, "dd", "") +BUILTIN(__nvvm_add_rz_d, "dd", "") +BUILTIN(__nvvm_add_rm_d, "dd", "") +BUILTIN(__nvvm_add_rp_d, "dd", "") + +// Convert + +BUILTIN(__nvvm_d2f_rn_ftz, "fd", "") +BUILTIN(__nvvm_d2f_rn, "fd", "") +BUILTIN(__nvvm_d2f_rz_ftz, "fd", "") +BUILTIN(__nvvm_d2f_rz, "fd", "") +BUILTIN(__nvvm_d2f_rm_ftz, "fd", "") +BUILTIN(__nvvm_d2f_rm, "fd", "") +BUILTIN(__nvvm_d2f_rp_ftz, "fd", "") +BUILTIN(__nvvm_d2f_rp, "fd", "") + +BUILTIN(__nvvm_d2i_rn, "id", "") +BUILTIN(__nvvm_d2i_rz, "id", "") +BUILTIN(__nvvm_d2i_rm, "id", "") +BUILTIN(__nvvm_d2i_rp, "id", "") + +BUILTIN(__nvvm_d2ui_rn, "Uid", "") +BUILTIN(__nvvm_d2ui_rz, "Uid", "") +BUILTIN(__nvvm_d2ui_rm, "Uid", "") +BUILTIN(__nvvm_d2ui_rp, "Uid", "") + +BUILTIN(__nvvm_i2d_rn, "di", "") +BUILTIN(__nvvm_i2d_rz, "di", "") +BUILTIN(__nvvm_i2d_rm, "di", "") +BUILTIN(__nvvm_i2d_rp, "di", "") + +BUILTIN(__nvvm_ui2d_rn, "dUi", "") +BUILTIN(__nvvm_ui2d_rz, "dUi", "") +BUILTIN(__nvvm_ui2d_rm, "dUi", "") +BUILTIN(__nvvm_ui2d_rp, "dUi", "") + +BUILTIN(__nvvm_f2i_rn_ftz, "if", "") +BUILTIN(__nvvm_f2i_rn, "if", "") +BUILTIN(__nvvm_f2i_rz_ftz, "if", "") +BUILTIN(__nvvm_f2i_rz, "if", "") +BUILTIN(__nvvm_f2i_rm_ftz, "if", "") +BUILTIN(__nvvm_f2i_rm, "if", "") +BUILTIN(__nvvm_f2i_rp_ftz, "if", "") +BUILTIN(__nvvm_f2i_rp, "if", "") + +BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "") +BUILTIN(__nvvm_f2ui_rn, "Uif", "") +BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "") +BUILTIN(__nvvm_f2ui_rz, "Uif", "") +BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "") +BUILTIN(__nvvm_f2ui_rm, "Uif", "") +BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "") +BUILTIN(__nvvm_f2ui_rp, "Uif", "") + +BUILTIN(__nvvm_i2f_rn, "fi", "") +BUILTIN(__nvvm_i2f_rz, "fi", "") +BUILTIN(__nvvm_i2f_rm, "fi", "") +BUILTIN(__nvvm_i2f_rp, "fi", "") + +BUILTIN(__nvvm_ui2f_rn, "fUi", "") +BUILTIN(__nvvm_ui2f_rz, "fUi", "") +BUILTIN(__nvvm_ui2f_rm, "fUi", "") +BUILTIN(__nvvm_ui2f_rp, "fUi", "") + +BUILTIN(__nvvm_lohi_i2d, "dii", "") + +BUILTIN(__nvvm_d2i_lo, "id", "") +BUILTIN(__nvvm_d2i_hi, "id", "") + +BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "") +BUILTIN(__nvvm_f2ll_rn, "LLif", "") +BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "") +BUILTIN(__nvvm_f2ll_rz, "LLif", "") +BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "") +BUILTIN(__nvvm_f2ll_rm, "LLif", "") +BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "") +BUILTIN(__nvvm_f2ll_rp, "LLif", "") + +BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "") +BUILTIN(__nvvm_f2ull_rn, "ULLif", "") +BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "") +BUILTIN(__nvvm_f2ull_rz, "ULLif", "") +BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "") +BUILTIN(__nvvm_f2ull_rm, "ULLif", "") +BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "") +BUILTIN(__nvvm_f2ull_rp, "ULLif", "") + +BUILTIN(__nvvm_d2ll_rn, "LLid", "") +BUILTIN(__nvvm_d2ll_rz, "LLid", "") +BUILTIN(__nvvm_d2ll_rm, "LLid", "") +BUILTIN(__nvvm_d2ll_rp, "LLid", "") + +BUILTIN(__nvvm_d2ull_rn, "ULLid", "") +BUILTIN(__nvvm_d2ull_rz, "ULLid", "") +BUILTIN(__nvvm_d2ull_rm, "ULLid", "") +BUILTIN(__nvvm_d2ull_rp, "ULLid", "") + +BUILTIN(__nvvm_ll2f_rn, "fLLi", "") +BUILTIN(__nvvm_ll2f_rz, "fLLi", "") +BUILTIN(__nvvm_ll2f_rm, "fLLi", "") +BUILTIN(__nvvm_ll2f_rp, "fLLi", "") + +BUILTIN(__nvvm_ull2f_rn, "fULLi", "") +BUILTIN(__nvvm_ull2f_rz, "fULLi", "") +BUILTIN(__nvvm_ull2f_rm, "fULLi", "") +BUILTIN(__nvvm_ull2f_rp, "fULLi", "") + +BUILTIN(__nvvm_ll2d_rn, "dLLi", "") +BUILTIN(__nvvm_ll2d_rz, "dLLi", "") +BUILTIN(__nvvm_ll2d_rm, "dLLi", "") +BUILTIN(__nvvm_ll2d_rp, "dLLi", "") + +BUILTIN(__nvvm_ull2d_rn, "dULLi", "") +BUILTIN(__nvvm_ull2d_rz, "dULLi", "") +BUILTIN(__nvvm_ull2d_rm, "dULLi", "") +BUILTIN(__nvvm_ull2d_rp, "dULLi", "") + +BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "") +BUILTIN(__nvvm_f2h_rn, "Usf", "") + +BUILTIN(__nvvm_h2f, "fUs", "") + +// Bitcast + +BUILTIN(__nvvm_bitcast_f2i, "if", "") +BUILTIN(__nvvm_bitcast_i2f, "fi", "") + +BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "") +BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") + +// Sync + +BUILTIN(__syncthreads, "v", "") +BUILTIN(__nvvm_bar0, "v", "") +BUILTIN(__nvvm_bar0_popc, "ii", "") +BUILTIN(__nvvm_bar0_and, "ii", "") +BUILTIN(__nvvm_bar0_or, "ii", "") + +// Membar + +BUILTIN(__nvvm_membar_cta, "v", "") +BUILTIN(__nvvm_membar_gl, "v", "") +BUILTIN(__nvvm_membar_sys, "v", "") + +// Memcpy, Memset + +BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","") +BUILTIN(__nvvm_memset, "vUc*Uczi","") + +// Image + +BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "") +BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "") +BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "") +BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "") + +BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "") +BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "") +BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "") +BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "") + +BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "") +BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "") +BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "") +BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "") +BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "") +BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "") +BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "") +BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "") + +// Atomic +// +// We need the atom intrinsics because +// - they are used in converging analysis +// - they are used in address space analysis and optimization +// So it does not hurt to expose them as builtins. +// +BUILTIN(__nvvm_atom_add_g_i, "iiD*1i", "n") +BUILTIN(__nvvm_atom_add_s_i, "iiD*3i", "n") +BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n") +BUILTIN(__nvvm_atom_add_g_l, "LiLiD*1Li", "n") +BUILTIN(__nvvm_atom_add_s_l, "LiLiD*3Li", "n") +BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n") +BUILTIN(__nvvm_atom_add_g_ll, "LLiLLiD*1LLi", "n") +BUILTIN(__nvvm_atom_add_s_ll, "LLiLLiD*3LLi", "n") +BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n") +BUILTIN(__nvvm_atom_add_g_f, "ffD*1f", "n") +BUILTIN(__nvvm_atom_add_s_f, "ffD*3f", "n") +BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n") + +BUILTIN(__nvvm_atom_sub_g_i, "iiD*1i", "n") +BUILTIN(__nvvm_atom_sub_s_i, "iiD*3i", "n") +BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n") +BUILTIN(__nvvm_atom_sub_g_l, "LiLiD*1Li", "n") +BUILTIN(__nvvm_atom_sub_s_l, "LiLiD*3Li", "n") +BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n") +BUILTIN(__nvvm_atom_sub_g_ll, "LLiLLiD*1LLi", "n") +BUILTIN(__nvvm_atom_sub_s_ll, "LLiLLiD*3LLi", "n") +BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n") + +BUILTIN(__nvvm_atom_xchg_g_i, "iiD*1i", "n") +BUILTIN(__nvvm_atom_xchg_s_i, "iiD*3i", "n") +BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n") +BUILTIN(__nvvm_atom_xchg_g_l, "LiLiD*1Li", "n") +BUILTIN(__nvvm_atom_xchg_s_l, "LiLiD*3Li", "n") +BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n") +BUILTIN(__nvvm_atom_xchg_g_ll, "LLiLLiD*1LLi", "n") +BUILTIN(__nvvm_atom_xchg_s_ll, "LLiLLiD*3LLi", "n") +BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n") + +BUILTIN(__nvvm_atom_max_g_i, "iiD*1i", "n") +BUILTIN(__nvvm_atom_max_s_i, "iiD*3i", "n") +BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n") +BUILTIN(__nvvm_atom_max_g_ui, "UiUiD*1Ui", "n") +BUILTIN(__nvvm_atom_max_s_ui, "UiUiD*3Ui", "n") +BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n") +BUILTIN(__nvvm_atom_max_g_l, "LiLiD*1Li", "n") +BUILTIN(__nvvm_atom_max_s_l, "LiLiD*3Li", "n") +BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n") +BUILTIN(__nvvm_atom_max_g_ul, "ULiULiD*1ULi", "n") +BUILTIN(__nvvm_atom_max_s_ul, "ULiULiD*3ULi", "n") +BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n") +BUILTIN(__nvvm_atom_max_g_ll, "LLiLLiD*1LLi", "n") +BUILTIN(__nvvm_atom_max_s_ll, "LLiLLiD*3LLi", "n") +BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n") +BUILTIN(__nvvm_atom_max_g_ull, "ULLiULLiD*1ULLi", "n") +BUILTIN(__nvvm_atom_max_s_ull, "ULLiULLiD*3ULLi", "n") +BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n") + +BUILTIN(__nvvm_atom_min_g_i, "iiD*1i", "n") +BUILTIN(__nvvm_atom_min_s_i, "iiD*3i", "n") +BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n") +BUILTIN(__nvvm_atom_min_g_ui, "UiUiD*1Ui", "n") +BUILTIN(__nvvm_atom_min_s_ui, "UiUiD*3Ui", "n") +BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n") +BUILTIN(__nvvm_atom_min_g_l, "LiLiD*1Li", "n") +BUILTIN(__nvvm_atom_min_s_l, "LiLiD*3Li", "n") +BUILTIN(__nvvm_atom_min_gen_l, "LiLi10D*Li", "n") +BUILTIN(__nvvm_atom_min_g_ul, "ULiULiD*1ULi", "n") +BUILTIN(__nvvm_atom_min_s_ul, "ULiULiD*3ULi", "n") +BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n") +BUILTIN(__nvvm_atom_min_g_ll, "LLiLLiD*1LLi", "n") +BUILTIN(__nvvm_atom_min_s_ll, "LLiLLiD*3LLi", "n") +BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n") +BUILTIN(__nvvm_atom_min_g_ull, "ULLiULLiD*1ULLi", "n") +BUILTIN(__nvvm_atom_min_s_ull, "ULLiULLiD*3ULLi", "n") +BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n") + +BUILTIN(__nvvm_atom_inc_g_ui, "UiUiD*1Ui", "n") +BUILTIN(__nvvm_atom_inc_s_ui, "UiUiD*3Ui", "n") +BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n") +BUILTIN(__nvvm_atom_dec_g_ui, "UiUiD*1Ui", "n") +BUILTIN(__nvvm_atom_dec_s_ui, "UiUiD*3Ui", "n") +BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n") + +BUILTIN(__nvvm_atom_and_g_i, "iiD*1i", "n") +BUILTIN(__nvvm_atom_and_s_i, "iiD*3i", "n") +BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n") +BUILTIN(__nvvm_atom_and_g_l, "LiLiD*1Li", "n") +BUILTIN(__nvvm_atom_and_s_l, "LiLiD*3Li", "n") +BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n") +BUILTIN(__nvvm_atom_and_g_ll, "LLiLLiD*1LLi", "n") +BUILTIN(__nvvm_atom_and_s_ll, "LLiLLiD*3LLi", "n") +BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n") + +BUILTIN(__nvvm_atom_or_g_i, "iiD*1i", "n") +BUILTIN(__nvvm_atom_or_s_i, "iiD*3i", "n") +BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n") +BUILTIN(__nvvm_atom_or_g_l, "LiLiD*1Li", "n") +BUILTIN(__nvvm_atom_or_s_l, "LiLiD*3Li", "n") +BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n") +BUILTIN(__nvvm_atom_or_g_ll, "LLiLLiD*1LLi", "n") +BUILTIN(__nvvm_atom_or_s_ll, "LLiLLiD*3LLi", "n") +BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n") + +BUILTIN(__nvvm_atom_xor_g_i, "iiD*1i", "n") +BUILTIN(__nvvm_atom_xor_s_i, "iiD*3i", "n") +BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n") +BUILTIN(__nvvm_atom_xor_g_l, "LiLiD*1Li", "n") +BUILTIN(__nvvm_atom_xor_s_l, "LiLiD*3Li", "n") +BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n") +BUILTIN(__nvvm_atom_xor_g_ll, "LLiLLiD*1LLi", "n") +BUILTIN(__nvvm_atom_xor_s_ll, "LLiLLiD*3LLi", "n") +BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n") + +BUILTIN(__nvvm_atom_cas_g_i, "iiD*1ii", "n") +BUILTIN(__nvvm_atom_cas_s_i, "iiD*3ii", "n") +BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n") +BUILTIN(__nvvm_atom_cas_g_l, "LiLiD*1LiLi", "n") +BUILTIN(__nvvm_atom_cas_s_l, "LiLiD*3LiLi", "n") +BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n") +BUILTIN(__nvvm_atom_cas_g_ll, "LLiLLiD*1LLiLLi", "n") +BUILTIN(__nvvm_atom_cas_s_ll, "LLiLLiD*3LLiLLi", "n") +BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n") + +// Compiler Error Warn +BUILTIN(__nvvm_compiler_error, "vcC*4", "n") +BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") #undef BUILTIN diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 2c7e0c136769..7deee8ef48eb 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -165,4 +165,13 @@ void nvvm_math(float f1, float f2, double d1, double d2) { double td3 = __nvvm_sqrt_rn_d(d1); // CHECK: call double @llvm.nvvm.rcp.rn.d double td4 = __nvvm_rcp_rn_d(d2); + +// CHECK: call void @llvm.nvvm.membar.cta() + __nvvm_membar_cta(); +// CHECK: call void @llvm.nvvm.membar.gl() + __nvvm_membar_gl(); +// CHECK: call void @llvm.nvvm.membar.sys() + __nvvm_membar_sys(); +// CHECK: call void @llvm.nvvm.barrier0() + __nvvm_bar0(); }