[OpenCL] Use long instead of long long in x86 builtins

Summary: According to C99 standard long long is at least 64 bits in
size. However, OpenCL C defines long long as 128 bit signed
integer. This prevents one to use x86 builtins when compiling OpenCL C
code for x86 targets. The patch changes long long to long for OpenCL
only.

Patch by: Alexander Batashev <alexander.batashev@intel.com>

Reviewers: craig.topper, Ka-Ka, eandrews, erichkeane, Anastasia

Reviewed By: Ka-Ka, erichkeane, Anastasia

Subscribers: a.elovikov, yaxunl, Anastasia, cfe-commits, ivankara, etyurin, asavonic

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62580

llvm-svn: 362391
This commit is contained in:
Andrew Savonichev 2019-06-03 12:34:59 +00:00
parent edfa756f3f
commit fa8cd7691a
5 changed files with 532 additions and 484 deletions

View File

@ -53,6 +53,7 @@
// Z -> int32_t (require a native 32-bit integer type on the target)
// W -> int64_t (require a native 64-bit integer type on the target)
// N -> 'int' size if target is LP64, 'L' otherwise.
// O -> long for OpenCL targets, long long otherwise.
// S -> signed
// U -> unsigned
// I -> Required to constant fold to an integer constant expression.

File diff suppressed because it is too large Load Diff

View File

@ -43,65 +43,65 @@ TARGET_HEADER_BUILTIN(_InterlockedOr64, "LLiLLiD*LLi", "nh", "intrin.h"
TARGET_HEADER_BUILTIN(_InterlockedXor64, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128, "UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "cx16")
TARGET_BUILTIN(__builtin_ia32_readeflags_u64, "ULLi", "n", "")
TARGET_BUILTIN(__builtin_ia32_writeeflags_u64, "vULLi", "n", "")
TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "n", "sse2")
TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "ncV:128:", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "nc", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_readeflags_u64, "UOi", "n", "")
TARGET_BUILTIN(__builtin_ia32_writeeflags_u64, "vUOi", "n", "")
TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "OiV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "OiV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "OiV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "OiV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_movnti64, "vOi*Oi", "n", "sse2")
TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2OiV2OiOiIi", "ncV:128:", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "OiV4OiIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4OiV4OiOiIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_rdfsbase64, "ULLi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_rdfsbase64, "UOi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_rdgsbase32, "Ui", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_rdgsbase64, "ULLi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_rdgsbase64, "UOi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_wrfsbase32, "vUi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_wrfsbase64, "vULLi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_wrfsbase64, "vUOi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_wrgsbase32, "vUi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_wrgsbase64, "vULLi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_wrgsbase64, "vUOi", "n", "fsgsbase")
TARGET_BUILTIN(__builtin_ia32_fxrstor64, "vv*", "n", "fxsr")
TARGET_BUILTIN(__builtin_ia32_fxsave64, "vv*", "n", "fxsr")
TARGET_BUILTIN(__builtin_ia32_xsave64, "vv*ULLi", "n", "xsave")
TARGET_BUILTIN(__builtin_ia32_xrstor64, "vv*ULLi", "n", "xsave")
TARGET_BUILTIN(__builtin_ia32_xsaveopt64, "vv*ULLi", "n", "xsaveopt")
TARGET_BUILTIN(__builtin_ia32_xrstors64, "vv*ULLi", "n", "xsaves")
TARGET_BUILTIN(__builtin_ia32_xsavec64, "vv*ULLi", "n", "xsavec")
TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*ULLi", "n", "xsaves")
TARGET_BUILTIN(__builtin_ia32_incsspq, "vULLi", "n", "shstk")
TARGET_BUILTIN(__builtin_ia32_rdsspq, "ULLiULLi", "n", "shstk")
TARGET_BUILTIN(__builtin_ia32_wrssq, "vULLiv*", "n", "shstk")
TARGET_BUILTIN(__builtin_ia32_wrussq, "vULLiv*", "n", "shstk")
TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcULLiULLiULLi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiULLi*", "n", "rdrnd")
TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiULLi*", "n", "rdseed")
TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "ULLiULLi", "nc", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_bextr_u64, "ULLiULLiULLi", "nc", "bmi")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "ULLiULLi", "nc", "")
TARGET_BUILTIN(__builtin_ia32_bzhi_di, "ULLiULLiULLi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_di, "ULLiULLiULLi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "nc", "tbm")
TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "n", "lwp")
TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "n", "lwp")
TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_xsave64, "vv*UOi", "n", "xsave")
TARGET_BUILTIN(__builtin_ia32_xrstor64, "vv*UOi", "n", "xsave")
TARGET_BUILTIN(__builtin_ia32_xsaveopt64, "vv*UOi", "n", "xsaveopt")
TARGET_BUILTIN(__builtin_ia32_xrstors64, "vv*UOi", "n", "xsaves")
TARGET_BUILTIN(__builtin_ia32_xsavec64, "vv*UOi", "n", "xsavec")
TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*UOi", "n", "xsaves")
TARGET_BUILTIN(__builtin_ia32_incsspq, "vUOi", "n", "shstk")
TARGET_BUILTIN(__builtin_ia32_rdsspq, "UOiUOi", "n", "shstk")
TARGET_BUILTIN(__builtin_ia32_wrssq, "vUOiv*", "n", "shstk")
TARGET_BUILTIN(__builtin_ia32_wrussq, "vUOiv*", "n", "shstk")
TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcUOiUOiUOi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcUOiUOiUOi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiUOi*", "n", "rdrnd")
TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiUOi*", "n", "rdseed")
TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "nc", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "nc", "bmi")
TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "nc", "")
TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_bextri_u64, "UOiUOiIUOi", "nc", "tbm")
TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcUOiUiUi", "n", "lwp")
TARGET_BUILTIN(__builtin_ia32_lwpval64, "vUOiUiUi", "n", "lwp")
TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "OiV2dIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "UOiV2dIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "OiV4fIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "UOiV4fIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "OiV2dIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "UOiV2dIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "OiV4fIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "UOiV4fIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dUOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fUOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vULLi", "n", "ptwrite")
TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vUOi", "n", "ptwrite")
#undef BUILTIN
#undef TARGET_BUILTIN

View File

@ -9282,13 +9282,13 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
Unsigned = true;
break;
case 'L':
assert(!IsSpecial && "Can't use 'L' with 'W', 'N' or 'Z' modifiers");
assert(!IsSpecial && "Can't use 'L' with 'W', 'N', 'Z' or 'O' modifiers");
assert(HowLong <= 2 && "Can't have LLLL modifier");
++HowLong;
break;
case 'N':
// 'N' behaves like 'L' for all non LP64 targets and 'int' otherwise.
assert(!IsSpecial && "Can't use two 'N', 'W' or 'Z' modifiers!");
assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
assert(HowLong == 0 && "Can't use both 'L' and 'N' modifiers!");
#ifndef NDEBUG
IsSpecial = true;
@ -9298,7 +9298,7 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
break;
case 'W':
// This modifier represents int64 type.
assert(!IsSpecial && "Can't use two 'N', 'W' or 'Z' modifiers!");
assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
assert(HowLong == 0 && "Can't use both 'L' and 'W' modifiers!");
#ifndef NDEBUG
IsSpecial = true;
@ -9316,7 +9316,7 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
break;
case 'Z':
// This modifier represents int32 type.
assert(!IsSpecial && "Can't use two 'N', 'W' or 'Z' modifiers!");
assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
assert(HowLong == 0 && "Can't use both 'L' and 'Z' modifiers!");
#ifndef NDEBUG
IsSpecial = true;
@ -9335,6 +9335,17 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
break;
}
break;
case 'O':
assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
assert(HowLong == 0 && "Can't use both 'L' and 'O' modifiers!");
#ifndef NDEBUG
IsSpecial = true;
#endif
if (Context.getLangOpts().OpenCL)
HowLong = 1;
else
HowLong = 2;
break;
}
}

View File

@ -1,5 +1,6 @@
// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +shstk -target-feature +wbnoinvd -target-feature +cldemote -emit-llvm -o %t %s
// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +shstk -target-feature +clzero -target-feature +wbnoinvd -target-feature +cldemote -fsyntax-only -o %t %s
// RUN: %clang_cc1 -DUSE_64 -DOPENCL -x cl -cl-std=CL2.0 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +shstk -target-feature +wbnoinvd -target-feature +cldemote -emit-llvm -o %t %s
#ifdef USE_ALL
#define USE_3DNOW
@ -11,7 +12,11 @@
typedef char V8c __attribute__((vector_size(8 * sizeof(char))));
typedef signed short V4s __attribute__((vector_size(8)));
typedef signed int V2i __attribute__((vector_size(8)));
#ifndef OPENCL
typedef signed long long V1LLi __attribute__((vector_size(8)));
#else
typedef signed long V1LLi __attribute__((vector_size(8)));
#endif
typedef float V2f __attribute__((vector_size(8)));
@ -19,7 +24,11 @@ typedef float V2f __attribute__((vector_size(8)));
typedef char V16c __attribute__((vector_size(16)));
typedef signed short V8s __attribute__((vector_size(16)));
typedef signed int V4i __attribute__((vector_size(16)));
#ifndef OPENCL
typedef signed long long V2LLi __attribute__((vector_size(16)));
#else
typedef signed long V2LLi __attribute__((vector_size(16)));
#endif
typedef float V4f __attribute__((vector_size(16)));
typedef double V2d __attribute__((vector_size(16)));
@ -27,7 +36,11 @@ typedef double V2d __attribute__((vector_size(16)));
// 256-bit
typedef char V32c __attribute__((vector_size(32)));
typedef signed int V8i __attribute__((vector_size(32)));
#ifndef OPENCL
typedef signed long long V4LLi __attribute__((vector_size(32)));
#else
typedef signed long V4LLi __attribute__((vector_size(32)));
#endif
typedef double V4d __attribute__((vector_size(32)));
typedef float V8f __attribute__((vector_size(32)));
@ -41,8 +54,13 @@ void f0() {
#endif
signed int tmp_i;
unsigned int tmp_Ui;
#ifndef OPENCL
signed long long tmp_LLi;
unsigned long long tmp_ULLi;
#else
signed long tmp_LLi;
unsigned long tmp_ULLi;
#endif
float tmp_f;
double tmp_d;
@ -55,7 +73,11 @@ void f0() {
const float* tmp_fCp;
double* tmp_dp;
const double* tmp_dCp;
#ifndef OPENCL
long long* tmp_LLip;
#else
long* tmp_LLip;
#endif
#define imm_i 32
#define imm_i_0_2 0
@ -102,8 +124,8 @@ void f0() {
const V4d* tmp_V4dCp;
const V8f* tmp_V8fCp;
tmp_V2LLi = __builtin_ia32_undef128();
tmp_V4LLi = __builtin_ia32_undef256();
tmp_V2d = __builtin_ia32_undef128();
tmp_V4d = __builtin_ia32_undef256();
tmp_i = __builtin_ia32_comieq(tmp_V4f, tmp_V4f);
tmp_i = __builtin_ia32_comilt(tmp_V4f, tmp_V4f);
@ -203,9 +225,9 @@ void f0() {
tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);
tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s);
tmp_V8s = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
tmp_V4i = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
tmp_V8s = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
tmp_V8s = __builtin_ia32_pmulhuw128(tmp_V8s, tmp_V8s);
tmp_V4f = __builtin_ia32_addsubps(tmp_V4f, tmp_V4f);
tmp_V2d = __builtin_ia32_addsubpd(tmp_V2d, tmp_V2d);
@ -225,7 +247,7 @@ void f0() {
tmp_V2i = __builtin_ia32_phsubd(tmp_V2i, tmp_V2i);
tmp_V8s = __builtin_ia32_phsubsw128(tmp_V8s, tmp_V8s);
tmp_V4s = __builtin_ia32_phsubsw(tmp_V4s, tmp_V4s);
tmp_V16c = __builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c);
tmp_V8s = __builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c);
tmp_V8c = __builtin_ia32_pmaddubsw(tmp_V8c, tmp_V8c);
tmp_V8s = __builtin_ia32_pmulhrsw128(tmp_V8s, tmp_V8s);
tmp_V4s = __builtin_ia32_pmulhrsw(tmp_V4s, tmp_V4s);
@ -271,9 +293,13 @@ void f0() {
__builtin_ia32_clrssbsy(tmp_vp);
(void) __builtin_ia32_ldmxcsr(tmp_Ui);
#ifndef OPENCL
(void) _mm_setcsr(tmp_Ui);
#endif
tmp_Ui = __builtin_ia32_stmxcsr();
#ifndef OPENCL
tmp_Ui = _mm_getcsr();
#endif
(void)__builtin_ia32_fxsave(tmp_vp);
(void)__builtin_ia32_fxsave64(tmp_vp);
(void)__builtin_ia32_fxrstor(tmp_vp);
@ -321,7 +347,9 @@ void f0() {
tmp_i = __builtin_ia32_pmovmskb(tmp_V8c);
(void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi);
(void) __builtin_ia32_sfence();
#ifndef OPENCL
(void) _mm_sfence();
#endif
tmp_V4s = __builtin_ia32_psadbw(tmp_V8c, tmp_V8c);
tmp_V4f = __builtin_ia32_rcpps(tmp_V4f);
@ -356,13 +384,21 @@ void f0() {
tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
(void) __builtin_ia32_clflush(tmp_vCp);
#ifndef OPENCL
(void) _mm_clflush(tmp_vCp);
#endif
(void) __builtin_ia32_lfence();
#ifndef OPENCL
(void) _mm_lfence();
#endif
(void) __builtin_ia32_mfence();
#ifndef OPENCL
(void) _mm_mfence();
#endif
(void) __builtin_ia32_pause();
#ifndef OPENCL
(void) _mm_pause();
#endif
tmp_V4s = __builtin_ia32_psllwi(tmp_V4s, tmp_i);
tmp_V2i = __builtin_ia32_pslldi(tmp_V2i, tmp_i);
tmp_V1LLi = __builtin_ia32_psllqi(tmp_V1LLi, tmp_i);
@ -389,12 +425,12 @@ void f0() {
tmp_V2LLi = __builtin_ia32_psrlqi128(tmp_V2LLi, tmp_i);
tmp_V8s = __builtin_ia32_psrawi128(tmp_V8s, tmp_i);
tmp_V4i = __builtin_ia32_psradi128(tmp_V4i, tmp_i);
tmp_V8s = __builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s);
tmp_V4i = __builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s);
(void) __builtin_ia32_monitor(tmp_vp, tmp_Ui, tmp_Ui);
(void) __builtin_ia32_mwait(tmp_Ui, tmp_Ui);
tmp_V16c = __builtin_ia32_lddqu(tmp_cCp);
tmp_V2LLi = __builtin_ia32_palignr128(tmp_V2LLi, tmp_V2LLi, imm_i);
tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i);
tmp_V16c = __builtin_ia32_palignr128(tmp_V16c, tmp_V16c, imm_i);
tmp_V8c = __builtin_ia32_palignr(tmp_V8c, tmp_V8c, imm_i);
#ifdef USE_SSE4
tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);