Add _mm_stream_si64 intrinsic.
While I'm here, also fix the alignment computation for the whole family of intrinsics. PR17298. llvm-svn: 191243
This commit is contained in:
parent
05ccc50ba4
commit
f9d8c6cebb
|
@ -258,6 +258,7 @@ BUILTIN(__builtin_ia32_storeupd, "vd*V2d", "")
|
|||
BUILTIN(__builtin_ia32_movmskpd, "iV2d", "")
|
||||
BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "")
|
||||
BUILTIN(__builtin_ia32_movnti, "vi*i", "")
|
||||
BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "")
|
||||
BUILTIN(__builtin_ia32_movntpd, "vd*V2d", "")
|
||||
BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "")
|
||||
BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "")
|
||||
|
|
|
@ -3249,7 +3249,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
case X86::BI__builtin_ia32_movntpd256:
|
||||
case X86::BI__builtin_ia32_movntdq:
|
||||
case X86::BI__builtin_ia32_movntdq256:
|
||||
case X86::BI__builtin_ia32_movnti: {
|
||||
case X86::BI__builtin_ia32_movnti:
|
||||
case X86::BI__builtin_ia32_movnti64: {
|
||||
llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
|
||||
Builder.getInt32(1));
|
||||
|
||||
|
@ -3259,7 +3260,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
"cast");
|
||||
StoreInst *SI = Builder.CreateStore(Ops[1], BC);
|
||||
SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
|
||||
SI->setAlignment(16);
|
||||
|
||||
// If the operand is an integer, we can't assume alignment. Otherwise,
|
||||
// assume natural alignment.
|
||||
QualType ArgTy = E->getArg(1)->getType();
|
||||
unsigned Align;
|
||||
if (ArgTy->isIntegerType())
|
||||
Align = 1;
|
||||
else
|
||||
Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
|
||||
SI->setAlignment(Align);
|
||||
return SI;
|
||||
}
|
||||
// 3DNow!
|
||||
|
|
|
@ -1214,6 +1214,14 @@ _mm_stream_si32(int *__p, int __a)
|
|||
__builtin_ia32_movnti(__p, __a);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_stream_si64(long long *__p, long long __a)
|
||||
{
|
||||
__builtin_ia32_movnti64(__p, __a);
|
||||
}
|
||||
#endif
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_clflush(void const *__p)
|
||||
{
|
||||
|
|
|
@ -55,6 +55,7 @@ void f0() {
|
|||
const float* tmp_fCp;
|
||||
double* tmp_dp;
|
||||
const double* tmp_dCp;
|
||||
long long* tmp_LLip;
|
||||
|
||||
#define imm_i 32
|
||||
#define imm_i_0_2 0
|
||||
|
@ -288,6 +289,9 @@ void f0() {
|
|||
tmp_i = __builtin_ia32_movmskpd(tmp_V2d);
|
||||
tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c);
|
||||
(void) __builtin_ia32_movnti(tmp_ip, tmp_i);
|
||||
#ifdef USE_64
|
||||
(void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi);
|
||||
#endif
|
||||
(void) __builtin_ia32_movntpd(tmp_dp, tmp_V2d);
|
||||
(void) __builtin_ia32_movntdq(tmp_V2LLip, tmp_V2LLi);
|
||||
tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
@ -188,3 +188,21 @@ void test_storel_epi64(__m128i x, void* y) {
|
|||
// CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
|
||||
_mm_storel_epi64(y, x);
|
||||
}
|
||||
|
||||
void test_stream_si32(int x, void *y) {
|
||||
// CHECK-LABEL: define void @test_stream_si32
|
||||
// CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal
|
||||
_mm_stream_si32(y, x);
|
||||
}
|
||||
|
||||
void test_stream_si64(long long x, void *y) {
|
||||
// CHECK-LABEL: define void @test_stream_si64
|
||||
// CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal
|
||||
_mm_stream_si64(y, x);
|
||||
}
|
||||
|
||||
void test_stream_si128(__m128i x, void *y) {
|
||||
// CHECK-LABEL: define void @test_stream_si128
|
||||
// CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal
|
||||
_mm_stream_si128(y, x);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue