Nehalem unaligned memory access is fast.

llvm-svn: 100089
This commit is contained in:
Evan Cheng 2010-04-01 05:58:17 +00:00
parent c0f5ce32d5
commit 738b0f9ec7
3 changed files with 15 additions and 2 deletions

View File

@ -59,6 +59,9 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
[FeatureCMOV]>; [FeatureCMOV]>;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">; "Bit testing of memory is slow">;
def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
"IsUAMemFast", "true",
"Fast unaligned memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions">; "Support SSE 4a instructions">;
@ -98,8 +101,10 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; FeatureFastUAMem]>;
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem]>;
// Sandy Bridge does not have FMA // Sandy Bridge does not have FMA
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;

View File

@ -266,6 +266,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned Model = 0; unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model); DetectFamilyModel(EAX, Family, Model);
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
// If it's Nehalem, unaligned memory access is fast.
if (Family == 15 && Model == 26)
IsUAMemFast = true;
GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1; HasX86_64 = (EDX >> 29) & 0x1;
@ -286,6 +289,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasFMA3(false) , HasFMA3(false)
, HasFMA4(false) , HasFMA4(false)
, IsBTMemSlow(false) , IsBTMemSlow(false)
, IsUAMemFast(false)
, HasVectorUAMem(false) , HasVectorUAMem(false)
, DarwinVers(0) , DarwinVers(0)
, stackAlignment(8) , stackAlignment(8)

View File

@ -78,6 +78,9 @@ protected:
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow; bool IsBTMemSlow;
/// IsUAMemFast - True if unaligned memory access is fast.
bool IsUAMemFast;
/// HasVectorUAMem - True if SIMD operations can have unaligned memory /// HasVectorUAMem - True if SIMD operations can have unaligned memory
/// operands. This may require setting a feature bit in the /// operands. This may require setting a feature bit in the
/// processor. /// processor.
@ -148,6 +151,7 @@ public:
bool hasFMA3() const { return HasFMA3; } bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; } bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; } bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetDarwin() const { return TargetType == isDarwin; }