diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 0216252c19b3..a7edcc848b0a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -122,6 +122,8 @@ def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", "Support RTM instructions">; def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", "Support ADX instructions">; +def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", + "Support PRFCHW instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td index bb362f5c7bb1..ba1aede3c1a0 100644 --- a/llvm/lib/Target/X86/X86Instr3DNow.td +++ b/llvm/lib/Target/X86/X86Instr3DNow.td @@ -84,13 +84,16 @@ defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">; defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; -def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; +def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", + [(int_x86_mmx_femms)]>; -def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), - "prefetch\t$addr", []>; +def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr), + "prefetch\t$addr", + [(prefetch addr:$addr, (i32 0), imm, (i32 1))]>; -def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), - "prefetchw\t$addr", []>; +def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", + [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))]>, TB, + Requires<[HasPrefetchW]>; // "3DNowA" instructions defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 39165e24a872..1add80b5edf9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -604,6 +604,8 @@ def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; def HasADX : Predicate<"Subtarget->hasADX()">; +def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; +def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 0f2c008ab965..1a7c2c29eb86 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -283,6 +283,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasLZCNT = true; ToggleFeature(X86::FeatureLZCNT); } + if (IsIntel && ((ECX >> 8) & 0x1)) { + HasPRFCHW = true; + ToggleFeature(X86::FeaturePRFCHW); + } if (IsAMD) { if ((ECX >> 6) & 0x1) { HasSSE4A = true; @@ -440,6 +444,7 @@ void X86Subtarget::initializeEnvironment() { HasBMI2 = false; HasRTM = false; HasADX = false; + HasPRFCHW = false; IsBTMemSlow = false; IsUAMemFast = false; HasVectorUAMem = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index e97da4b6f4f2..b9f29fdcee03 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -124,6 +124,9 @@ protected: /// HasADX - Processor has ADX instructions. bool HasADX; + /// HasPRFCHW - Processor has PRFCHW instructions. + bool HasPRFCHW; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -254,6 +257,7 @@ public: bool hasBMI2() const { return HasBMI2; } bool hasRTM() const { return HasRTM; } bool hasADX() const { return HasADX; } + bool hasPRFCHW() const { return HasPRFCHW; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll index ec2f302b1499..12434a9a5297 100644 --- a/llvm/test/CodeGen/X86/prefetch.ll +++ b/llvm/test/CodeGen/X86/prefetch.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW ; rdar://10538297 @@ -9,10 +10,12 @@ entry: ; CHECK: prefetcht1 ; CHECK: prefetcht0 ; CHECK: prefetchnta +; PRFCHW: prefetchw tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 ) ret void }