diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 140c80dee4da..19912cc6bc81 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -25,7 +25,6 @@ set(sources X86JITInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp - X86PadShortFunction.cpp X86RegisterInfo.cpp X86SelectionDAGInfo.cpp X86Subtarget.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 88dbb6d67aae..1e7b98d94f46 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -63,11 +63,6 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, /// FunctionPass *createEmitX86CodeToMemory(); -/// createX86PadShortFunctions - Return a pass that pads short functions -/// with NOOPs. This will prevent a stall when returning from the function -/// on the Atom. -FunctionPass *createX86PadShortFunctions(); - } // End llvm namespace #endif diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 3ab28993655c..b9d8cf76453b 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -123,11 +123,8 @@ def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; -def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", - "PadShortFunctions", "true", - "Pad short functions">; + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -170,7 +167,7 @@ def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, FeaturePadShortFunctions]>; + FeatureSlowDivide]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp deleted file mode 100644 index 05f8a62a75e7..000000000000 --- a/llvm/lib/Target/X86/X86PadShortFunction.cpp +++ /dev/null @@ -1,184 +0,0 @@ -//===-------- X86PadShortFunction.cpp - pad short functions -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the pass which will pad short functions to prevent -// a stall if a function returns before the return address is ready. This -// is needed for some Intel Atom processors. -// -//===----------------------------------------------------------------------===// - -#include -#include - -#define DEBUG_TYPE "x86-pad-short-functions" -#include "X86.h" -#include "X86InstrInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -using namespace llvm; - -STATISTIC(NumBBsPadded, "Number of basic blocks padded"); - -namespace { - struct PadShortFunc : public MachineFunctionPass { - static char ID; - PadShortFunc() : MachineFunctionPass(ID) - , Threshold(4) - {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const - { - return "X86 Atom pad short functions"; - } - - private: - bool addPadding(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned int NOOPsToAdd); - - void findReturn(MachineFunction &MF, - MachineBasicBlock &MBB, - unsigned int Cycles); - - bool cyclesUntilReturn(MachineFunction &MF, - MachineBasicBlock &MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location = 0); - - const unsigned int Threshold; - std::map ReturnBBs; - }; - - char PadShortFunc::ID = 0; -} - -FunctionPass *llvm::createX86PadShortFunctions() { - return new PadShortFunc(); -} - -/// runOnMachineFunction - Loop over all of the basic blocks, inserting -/// NOOP instructions before early exits. -bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { - // Process all basic blocks. - ReturnBBs.clear(); - - // Search through basic blocks and mark the ones that have early returns - findReturn(MF, *MF.begin(), 0); - - int BBNum; - MachineBasicBlock::iterator ReturnLoc; - MachineBasicBlock *MBB; - - unsigned int Cycles = 0; - unsigned int BBCycles; - - // Pad the identified basic blocks with NOOPs - for (std::map::iterator I = ReturnBBs.begin(); - I != ReturnBBs.end(); ++I) { - BBNum = I->first; - Cycles = I->second; - - if (Cycles < Threshold) { - MBB = MF.getBlockNumbered(BBNum); - if (!cyclesUntilReturn(MF, *MBB, BBCycles, &ReturnLoc)) - continue; - - addPadding(MF, *MBB, ReturnLoc, Threshold - Cycles); - NumBBsPadded++; - } - } - - return false; -} - -/// findReturn - Starting at MBB, follow control flow and add all -/// basic blocks that contain a return to ReturnBBs. -void PadShortFunc::findReturn(MachineFunction &MF, - MachineBasicBlock &MBB, - unsigned int Cycles) -{ - // If this BB has a return, note how many cycles it takes to get there. - bool hasReturn = cyclesUntilReturn(MF, MBB, Cycles); - if (Cycles >= Threshold) - return; - - if (hasReturn) { - int BBNum = MBB.getNumber(); - ReturnBBs[BBNum] = std::max(ReturnBBs[BBNum], Cycles); - - return; - } - - // Follow branches in BB and look for returns - for (MachineBasicBlock::succ_iterator I = MBB.succ_begin(); - I != MBB.succ_end(); ++I) { - findReturn(MF, **I, Cycles); - } -} - -/// cyclesUntilReturn - if the MBB has a return instruction, set Location to -/// to the instruction and return true. Return false otherwise. -/// Cycles will be incremented by the number of cycles taken to reach the -/// return or the end of the BB, whichever occurs first. -bool PadShortFunc::cyclesUntilReturn(MachineFunction &MF, - MachineBasicBlock &MBB, - unsigned int &Cycles, - MachineBasicBlock::iterator *Location) -{ - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - const TargetMachine &Target = MF.getTarget(); - - for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end(); - ++MBBI) { - MachineInstr *MI = MBBI; - // Mark basic blocks with a return instruction. Calls to other functions - // do not count because the called function will be padded, if necessary - if (MI->isReturn() && !MI->isCall()) { - if (Location) - *Location = MBBI; - return true; - } - - Cycles += TII.getInstrLatency(Target.getInstrItineraryData(), MI); - } - - return false; -} - -/// addPadding - Add the given number of NOOP instructions to the function -/// right before the return at MBBI -bool PadShortFunc::addPadding(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned int NOOPsToAdd) -{ - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - - DebugLoc DL = MBBI->getDebugLoc(); - - while (NOOPsToAdd-- > 0) { - // Since Atom has two instruction execution ports, - // the code emits two noops, which will be executed in parallell - // during one cycle. - BuildMI(MBB, MBBI, DL, TII.get(X86::NOOP)); - BuildMI(MBB, MBBI, DL, TII.get(X86::NOOP)); - } - - return true; -} - diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 53c28f4fce2c..d493b787524c 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -350,7 +350,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , UseLeaForSP(false) , HasSlowDivide(false) , PostRAScheduler(false) - , PadShortFunctions(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 080f4cfeca75..44f38a1a916d 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -146,10 +146,6 @@ protected: /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; - /// PadShortFunctions - True if the short functions should be padded to prevent - /// a stall when returning too early. - bool PadShortFunctions; - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -235,7 +231,6 @@ public: bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide() const { return HasSlowDivide; } - bool padShortFunctions() const { return PadShortFunctions; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 8393f7e91eda..ea99796f3512 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -190,10 +190,6 @@ bool X86PassConfig::addPreEmitPass() { addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } - if (getX86Subtarget().padShortFunctions()){ - addPass(createX86PadShortFunctions()); - ShouldPrint = true; - } return ShouldPrint; } diff --git a/llvm/test/CodeGen/X86/atom-pad-short-functions.ll b/llvm/test/CodeGen/X86/atom-pad-short-functions.ll deleted file mode 100644 index 3e5993459705..000000000000 --- a/llvm/test/CodeGen/X86/atom-pad-short-functions.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s - -declare void @external_function(...) - -define i32 @test_return_val(i32 %a) nounwind { -; CHECK: test_return_val -; CHECK: movl -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret - ret i32 %a -} - -define i32 @test_add(i32 %a, i32 %b) nounwind { -; CHECK: test_add -; CHECK: addl -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret - %result = add i32 %a, %b - ret i32 %result -} - -define i32 @test_multiple_ret(i32 %a, i32 %b, i1 %c) nounwind { -; CHECK: @test_multiple_ret -; CHECK: je - -; CHECK: nop -; CHECK: nop -; CHECK: ret - -; CHECK: nop -; CHECK: nop -; CHECK: ret - - br i1 %c, label %bb1, label %bb2 - -bb1: - ret i32 %a - -bb2: - ret i32 %b -} - -define void @test_call_others(i32 %x) nounwind -{ -; CHECK: test_call_others -; CHECK: je - %tobool = icmp eq i32 %x, 0 - br i1 %tobool, label %if.end, label %true.case - -; CHECK: jmp external_function -true.case: - tail call void bitcast (void (...)* @external_function to void ()*)() nounwind - br label %if.end - -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret -if.end: - ret void - -} diff --git a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll index ad1520ef8194..cdfaf7f4c134 100644 --- a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll +++ b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s -; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mattr=-avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s +; RUN: llc < %s -mattr=+avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/llvm/test/CodeGen/X86/ret-mmx.ll b/llvm/test/CodeGen/X86/ret-mmx.ll index 778e4722cd95..865e147a4a24 100644 --- a/llvm/test/CodeGen/X86/ret-mmx.ll +++ b/llvm/test/CodeGen/X86/ret-mmx.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mcpu=core2 -mattr=+mmx,+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mattr=+mmx,+sse2 | FileCheck %s ; rdar://6602459 @g_v1di = external global <1 x i64> diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index 09ca07b31a10..3bec3acdbf76 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -282,7 +282,7 @@ define i32 @test13(i32 %a, i32 %b) nounwind { ; ATOM: test13: ; ATOM: cmpl ; ATOM-NEXT: sbbl -; ATOM: ret +; ATOM-NEXT: ret } define i32 @test14(i32 %a, i32 %b) nounwind { @@ -299,7 +299,7 @@ define i32 @test14(i32 %a, i32 %b) nounwind { ; ATOM: cmpl ; ATOM-NEXT: sbbl ; ATOM-NEXT: notl -; ATOM: ret +; ATOM-NEXT: ret } ; rdar://10961709