From 4c91aa3418f7e0ea2976d78ff552a6ba943ed165 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 2 Jan 2009 05:35:45 +0000 Subject: [PATCH] Do not isel load folding bt instructions for pentium m, core, core2, and AMD processors. These are significantly slower than a load followed by a bt of a register. llvm-svn: 61557 --- llvm/lib/Target/X86/X86.td | 34 +++++++++++++------------- llvm/lib/Target/X86/X86InstrInfo.td | 5 ++-- llvm/lib/Target/X86/X86Subtarget.cpp | 36 +++++++++++++++++++--------- llvm/lib/Target/X86/X86Subtarget.h | 5 ++++ llvm/test/CodeGen/X86/bt.ll | 2 ++ 5 files changed, 53 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 6d08b36be44d..8867298abb81 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -48,6 +48,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions", [FeatureSSE2]>; +def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", + "Bit testing of memory is slow">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -66,27 +68,27 @@ def : Proc<"i686", []>; def : Proc<"pentiumpro", []>; def : Proc<"pentium2", [FeatureMMX]>; def : Proc<"pentium3", [FeatureSSE1]>; -def : Proc<"pentium-m", [FeatureSSE2]>; +def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; -def : Proc<"x86-64", [Feature64Bit]>; -def : Proc<"yonah", [FeatureSSE3]>; -def : Proc<"prescott", [FeatureSSE3]>; -def : Proc<"nocona", [FeatureSSE3, Feature64Bit]>; -def : Proc<"core2", [FeatureSSSE3, Feature64Bit]>; -def : Proc<"penryn", [FeatureSSE41, Feature64Bit]>; +def : Proc<"x86-64", [Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; +def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; +def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>; -def : Proc<"athlon", [FeatureMMX, Feature3DNowA]>; -def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA]>; -def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA]>; -def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA]>; -def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA]>; -def : Proc<"k8", [Feature3DNowA, Feature64Bit]>; -def : Proc<"opteron", [Feature3DNowA, Feature64Bit]>; -def : Proc<"athlon64", [Feature3DNowA, Feature64Bit]>; -def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit]>; +def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"k8", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"opteron", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"athlon64", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index be36cba5e4d5..b00ca6475426 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -222,6 +222,7 @@ def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">; def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; def OptForSpeed : Predicate<"!OptForSize">; +def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. @@ -2666,11 +2667,11 @@ def BT32rr : I<0xA3, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(X86bt (loadi16 addr:$src1), GR16:$src2), - (implicit EFLAGS)]>, OpSize, TB; + (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>; def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(X86bt (loadi32 addr:$src1), GR32:$src2), - (implicit EFLAGS)]>, TB; + (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>; } // Defs = [EFLAGS] // Sign/Zero extenders diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 106ce46525c5..33a7b4534644 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -149,6 +149,18 @@ bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, return true; } +static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) { + Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (Family == 6 || Family == 0xf) { + if (Family == 0xf) + // Examine extended family ID if family ID is F. + Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; union { @@ -169,8 +181,15 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if ((ECX >> 19) & 0x1) X86SSELevel = SSE41; if ((ECX >> 20) & 0x1) X86SSELevel = SSE42; - if (memcmp(text.c, "GenuineIntel", 12) == 0 || - memcmp(text.c, "AuthenticAMD", 12) == 0) { + bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; + bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; + if (IsIntel || IsAMD) { + // Determine if bit test memory instructions are slow. + unsigned Family = 0; + unsigned Model = 0; + DetectFamilyModel(EAX, Family, Model); + IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); + X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); HasX86_64 = (EDX >> 29) & 0x1; } @@ -180,15 +199,9 @@ static const char *GetCurrentX86CPU() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) return "generic"; - unsigned Family = (EAX >> 8) & 0xf; // Bits 8 - 11 - unsigned Model = (EAX >> 4) & 0xf; // Bits 4 - 7 - if (Family == 6 || Family == 0xf) { - if (Family == 0xf) - // Examine extended family ID if family ID is F. - Family += (EAX >> 20) & 0xff; // Bits 20 - 27 - // Examine extended model ID if family ID is 6 or F. - Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 - } + unsigned Family = 0; + unsigned Model = 0; + DetectFamilyModel(EAX, Family, Model); X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); bool Em64T = (EDX >> 29) & 0x1; @@ -285,6 +298,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) , HasX86_64(false) + , IsBTMemSlow(false) , DarwinVers(0) , IsLinux(false) , stackAlignment(8) diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index f405ac798bb6..646a953370fe 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -64,6 +64,9 @@ protected: /// HasX86_64 - True if the processor supports X86-64 instructions. /// bool HasX86_64; + + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. + bool IsBTMemSlow; /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. @@ -127,6 +130,8 @@ public: bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } + bool isBTMemSlow() const { return IsBTMemSlow; } + unsigned getAsmFlavor() const { return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0; } diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll index bc77a58ce2ba..b63a3f8ecf77 100644 --- a/llvm/test/CodeGen/X86/bt.ll +++ b/llvm/test/CodeGen/X86/bt.ll @@ -1,4 +1,6 @@ ; RUN: llvm-as < %s | llc | grep btl +; RUN: llvm-as < %s | llc -mcpu=pentium4 | grep btl | grep esp +; RUN: llvm-as < %s | llc -mcpu=penryn | grep btl | not grep esp ; PR3253 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8"