now that fp reg kill insertion stuff happens as a separate

pass after isel instead of being interlaced with it, we can
trust that all the code for a function has been isel'd before
it is run.

The practical impact of this is that we can scan for machine
instr phis instead of doing a fuzzy match on the LLVM BB for
phi nodes.  Doing the fuzzy match required knowing when isel
would produce an fp reg stack phi which was gross.  It was
also wrong in cases where select got lowered to a branch
tree because cmovs aren't available (PR6828).

Just do the scan on machine phis which is simpler, faster
and more correct.  This fixes PR6828.

llvm-svn: 104333
This commit is contained in:
Chris Lattner 2010-05-21 18:17:54 +00:00
parent 1a23f26832
commit 0735ecfe17
2 changed files with 58 additions and 38 deletions

View File

@ -14,7 +14,6 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@ -53,10 +52,26 @@ FunctionPass *llvm::createX87FPRegKillInserterPass() {
return new FPRegKiller();
}
/// isFPStackVReg - Return true if the specified vreg is from a fp stack
/// register class.
static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
if (!TargetRegisterInfo::isVirtualRegister(RegNo))
return false;
switch (MRI.getRegClass(RegNo)->getID()) {
default: return false;
case X86::RFP32RegClassID:
case X86::RFP64RegClassID:
case X86::RFP80RegClassID:
return true;
}
}
/// ContainsFPStackCode - Return true if the specific MBB has floating point
/// stack code, and thus needs an FP_REG_KILL.
static bool ContainsFPStackCode(MachineBasicBlock *MBB, unsigned SSELevel,
MachineRegisterInfo &MRI) {
static bool ContainsFPStackCode(MachineBasicBlock *MBB,
const MachineRegisterInfo &MRI) {
// Scan the block, looking for instructions that define fp stack vregs.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
@ -64,42 +79,29 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB, unsigned SSELevel,
continue;
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef() ||
!TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()))
if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
continue;
const TargetRegisterClass *RegClass =
MRI.getRegClass(I->getOperand(op).getReg());
switch (RegClass->getID()) {
default: break;
case X86::RFP32RegClassID:
case X86::RFP64RegClassID:
case X86::RFP80RegClassID:
if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
return true;
}
}
}
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
// a copy of the input value in this block. In SSE mode, we only care about
// 80-bit values.
// a copy of the input value in this block, which is a definition of the
// value.
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
E = MBB->succ_end(); SI != E; ++ SI) {
MachineBasicBlock *SuccBB = *SI;
for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
I != E; ++I) {
// All PHI nodes are at the top of the block.
if (!I->isPHI()) break;
// Final check, check LLVM BB's that are successors to the LLVM BB
// corresponding to BB for FP PHI nodes.
const BasicBlock *LLVMBB = MBB->getBasicBlock();
for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
SI != E; ++SI) {
const PHINode *PN;
for (BasicBlock::const_iterator II = SI->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
if (PN->getType()->isX86_FP80Ty() ||
(SSELevel == 0 && PN->getType()->isFloatingPointTy()) ||
(SSELevel < 2 && PN->getType()->isDoubleTy())) {
if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
return true;
}
}
}
return false;
}
@ -120,19 +122,12 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// Fast-path: If nothing is using the x87 registers, we don't need to do
// any scanning.
MachineRegisterInfo &MRI = MF.getRegInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
return false;
const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
unsigned SSELevel = 0;
if (Subtarget.hasSSE2())
SSELevel = 2;
else if (Subtarget.hasSSE1())
SSELevel = 1;
bool Changed = false;
MachineFunction::iterator MBBI = MF.begin();
MachineFunction::iterator EndMBB = MF.end();
@ -149,7 +144,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
}
// If we find any FP stack code, emit the FP_REG_KILL instruction.
if (ContainsFPStackCode(MBB, SSELevel, MRI)) {
if (ContainsFPStackCode(MBB, MRI)) {
BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;

View File

@ -0,0 +1,25 @@
; RUN: llc %s -o - -mcpu=pentium
; PR6828
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
define void @foo() nounwind {
entry:
%tmp6 = load x86_fp80* undef ; <x86_fp80> [#uses=2]
%tmp15 = load x86_fp80* undef ; <x86_fp80> [#uses=2]
%tmp24 = load x86_fp80* undef ; <x86_fp80> [#uses=1]
br i1 undef, label %return, label %bb.nph
bb.nph: ; preds = %entry
%cmp139 = fcmp ogt x86_fp80 %tmp15, %tmp6 ; <i1> [#uses=1]
%maxdiag.0 = select i1 %cmp139, x86_fp80 %tmp15, x86_fp80 %tmp6 ; <x86_fp80> [#uses=1]
%cmp139.1 = fcmp ogt x86_fp80 %tmp24, %maxdiag.0 ; <i1> [#uses=1]
br i1 %cmp139.1, label %sw.bb372, label %return
sw.bb372: ; preds = %for.end
ret void
return: ; preds = %for.end
ret void
}