Disable the vzeroupper insertion pass on PS4.
Differential Revision: http://reviews.llvm.org/D16837 llvm-svn: 260764
This commit is contained in:
parent
76fbdeb7d5
commit
0de36ec169
|
@ -239,6 +239,11 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
|
||||||
def FeatureSoftFloat
|
def FeatureSoftFloat
|
||||||
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
|
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
|
||||||
"Use software floating point features.">;
|
"Use software floating point features.">;
|
||||||
|
// On at least some AMD processors, there is no performance hazard to writing
|
||||||
|
// only the lower parts of a YMM register without clearing the upper part.
|
||||||
|
def FeatureFastPartialYMMWrite
|
||||||
|
: SubtargetFeature<"fast-partial-ymm-write", "HasFastPartialYMMWrite",
|
||||||
|
"true", "Partial writes to YMM registers are fast">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// X86 processors supported.
|
// X86 processors supported.
|
||||||
|
@ -596,7 +601,8 @@ def : ProcessorModel<"btver2", BtVer2Model, [
|
||||||
FeatureXSAVE,
|
FeatureXSAVE,
|
||||||
FeatureXSAVEOPT,
|
FeatureXSAVEOPT,
|
||||||
FeatureSlowSHLD,
|
FeatureSlowSHLD,
|
||||||
FeatureLAHFSAHF
|
FeatureLAHFSAHF,
|
||||||
|
FeatureFastPartialYMMWrite
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
// Bulldozer
|
// Bulldozer
|
||||||
|
|
|
@ -285,6 +285,7 @@ void X86Subtarget::initializeEnvironment() {
|
||||||
HasSSEUnalignedMem = false;
|
HasSSEUnalignedMem = false;
|
||||||
HasCmpxchg16b = false;
|
HasCmpxchg16b = false;
|
||||||
UseLeaForSP = false;
|
UseLeaForSP = false;
|
||||||
|
HasFastPartialYMMWrite = false;
|
||||||
HasSlowDivide32 = false;
|
HasSlowDivide32 = false;
|
||||||
HasSlowDivide64 = false;
|
HasSlowDivide64 = false;
|
||||||
PadShortFunctions = false;
|
PadShortFunctions = false;
|
||||||
|
|
|
@ -189,6 +189,10 @@ protected:
|
||||||
/// the stack pointer. This is an optimization for Intel Atom processors.
|
/// the stack pointer. This is an optimization for Intel Atom processors.
|
||||||
bool UseLeaForSP;
|
bool UseLeaForSP;
|
||||||
|
|
||||||
|
/// True if there is no performance penalty to writing only the lower parts
|
||||||
|
/// of a YMM register without clearing the upper part.
|
||||||
|
bool HasFastPartialYMMWrite;
|
||||||
|
|
||||||
/// True if 8-bit divisions are significantly faster than
|
/// True if 8-bit divisions are significantly faster than
|
||||||
/// 32-bit divisions and should be used when possible.
|
/// 32-bit divisions and should be used when possible.
|
||||||
bool HasSlowDivide32;
|
bool HasSlowDivide32;
|
||||||
|
@ -421,6 +425,7 @@ public:
|
||||||
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
|
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
|
||||||
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
|
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
|
||||||
bool useLeaForSP() const { return UseLeaForSP; }
|
bool useLeaForSP() const { return UseLeaForSP; }
|
||||||
|
bool hasFastPartialYMMWrite() const { return HasFastPartialYMMWrite; }
|
||||||
bool hasSlowDivide32() const { return HasSlowDivide32; }
|
bool hasSlowDivide32() const { return HasSlowDivide32; }
|
||||||
bool hasSlowDivide64() const { return HasSlowDivide64; }
|
bool hasSlowDivide64() const { return HasSlowDivide64; }
|
||||||
bool padShortFunctions() const { return PadShortFunctions; }
|
bool padShortFunctions() const { return PadShortFunctions; }
|
||||||
|
|
|
@ -248,7 +248,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
|
||||||
/// vzeroupper instructions before function calls.
|
/// vzeroupper instructions before function calls.
|
||||||
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
|
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
|
||||||
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
|
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
|
||||||
if (!ST.hasAVX() || ST.hasAVX512())
|
if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite())
|
||||||
return false;
|
return false;
|
||||||
TII = ST.getInstrInfo();
|
TII = ST.getInstrInfo();
|
||||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
|
|
@ -1,4 +1,9 @@
|
||||||
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
|
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
|
||||||
|
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mattr=+avx,+fast-partial-ymm-write | FileCheck --check-prefix=FASTYMM %s
|
||||||
|
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck --check-prefix=BTVER2 %s
|
||||||
|
|
||||||
|
; FASTYMM-NOT: vzeroupper
|
||||||
|
; BTVER2-NOT: vzeroupper
|
||||||
|
|
||||||
declare i32 @foo()
|
declare i32 @foo()
|
||||||
declare <4 x float> @do_sse(<4 x float>)
|
declare <4 x float> @do_sse(<4 x float>)
|
||||||
|
|
Loading…
Reference in New Issue