Add _mm_stream_si64 intrinsic.

While I'm here, also fix the alignment computation for the whole family of
intrinsics.

PR17298.

llvm-svn: 191243
This commit is contained in:
Eli Friedman 2013-09-23 23:38:39 +00:00
parent 05ccc50ba4
commit f9d8c6cebb
5 changed files with 44 additions and 3 deletions

View File

@ -258,6 +258,7 @@ BUILTIN(__builtin_ia32_storeupd, "vd*V2d", "")
BUILTIN(__builtin_ia32_movmskpd, "iV2d", "")
BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "")
BUILTIN(__builtin_ia32_movnti, "vi*i", "")
BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "")
BUILTIN(__builtin_ia32_movntpd, "vd*V2d", "")
BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "")
BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "")

View File

@ -3249,7 +3249,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_movntpd256:
case X86::BI__builtin_ia32_movntdq:
case X86::BI__builtin_ia32_movntdq256:
case X86::BI__builtin_ia32_movnti: {
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64: {
llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
Builder.getInt32(1));
@ -3259,7 +3260,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
"cast");
StoreInst *SI = Builder.CreateStore(Ops[1], BC);
SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
SI->setAlignment(16);
// If the operand is an integer, we can't assume alignment. Otherwise,
// assume natural alignment.
QualType ArgTy = E->getArg(1)->getType();
unsigned Align;
if (ArgTy->isIntegerType())
Align = 1;
else
Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
SI->setAlignment(Align);
return SI;
}
// 3DNow!

View File

@ -1214,6 +1214,14 @@ _mm_stream_si32(int *__p, int __a)
__builtin_ia32_movnti(__p, __a);
}
#ifdef __x86_64__
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_si64(long long *__p, long long __a)
{
__builtin_ia32_movnti64(__p, __a);
}
#endif
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_clflush(void const *__p)
{

View File

@ -55,6 +55,7 @@ void f0() {
const float* tmp_fCp;
double* tmp_dp;
const double* tmp_dCp;
long long* tmp_LLip;
#define imm_i 32
#define imm_i_0_2 0
@ -288,6 +289,9 @@ void f0() {
tmp_i = __builtin_ia32_movmskpd(tmp_V2d);
tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c);
(void) __builtin_ia32_movnti(tmp_ip, tmp_i);
#ifdef USE_64
(void) __builtin_ia32_movnti64(tmp_LLip, tmp_LLi);
#endif
(void) __builtin_ia32_movntpd(tmp_dp, tmp_V2d);
(void) __builtin_ia32_movntdq(tmp_V2LLip, tmp_V2LLi);
tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);

View File

@ -1,4 +1,4 @@
// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
#include <xmmintrin.h>
#include <emmintrin.h>
@ -188,3 +188,21 @@ void test_storel_epi64(__m128i x, void* y) {
// CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
_mm_storel_epi64(y, x);
}
void test_stream_si32(int x, void *y) {
// CHECK-LABEL: define void @test_stream_si32
// CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal
_mm_stream_si32(y, x);
}
void test_stream_si64(long long x, void *y) {
// CHECK-LABEL: define void @test_stream_si64
// CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal
_mm_stream_si64(y, x);
}
void test_stream_si128(__m128i x, void *y) {
// CHECK-LABEL: define void @test_stream_si128
// CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal
_mm_stream_si128(y, x);
}