From 0735ecfe17387acc0e5dc2914b01de08bb031ac5 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Fri, 21 May 2010 18:17:54 +0000 Subject: [PATCH] now that fp reg kill insertion stuff happens as a separate pass after isel instead of being interlaced with it, we can trust that all the code for a function has been isel'd before it is run. The practical impact of this is that we can scan for machine instr phis instead of doing a fuzzy match on the LLVM BB for phi nodes. Doing the fuzzy match required knowing when isel would produce an fp reg stack phi which was gross. It was also wrong in cases where select got lowered to a branch tree because cmovs aren't available (PR6828). Just do the scan on machine phis which is simpler, faster and more correct. This fixes PR6828. llvm-svn: 104333 --- .../Target/X86/X86FloatingPointRegKill.cpp | 71 +++++++++---------- llvm/test/CodeGen/X86/fp-stack.ll | 25 +++++++ 2 files changed, 58 insertions(+), 38 deletions(-) create mode 100644 llvm/test/CodeGen/X86/fp-stack.ll diff --git a/llvm/lib/Target/X86/X86FloatingPointRegKill.cpp b/llvm/lib/Target/X86/X86FloatingPointRegKill.cpp index d9c69f5f9123..747683dcc412 100644 --- a/llvm/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/llvm/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" -#include "X86Subtarget.h" #include "llvm/Instructions.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -53,10 +52,26 @@ FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); } +/// isFPStackVReg - Return true if the specified vreg is from a fp stack +/// register class. +static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RegNo)) + return false; + + switch (MRI.getRegClass(RegNo)->getID()) { + default: return false; + case X86::RFP32RegClassID: + case X86::RFP64RegClassID: + case X86::RFP80RegClassID: + return true; + } +} + + /// ContainsFPStackCode - Return true if the specific MBB has floating point /// stack code, and thus needs an FP_REG_KILL. -static bool ContainsFPStackCode(MachineBasicBlock *MBB, unsigned SSELevel, - MachineRegisterInfo &MRI) { +static bool ContainsFPStackCode(MachineBasicBlock *MBB, + const MachineRegisterInfo &MRI) { // Scan the block, looking for instructions that define fp stack vregs. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { @@ -64,40 +79,27 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB, unsigned SSELevel, continue; for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef() || - !TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg())) + if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef()) continue; - const TargetRegisterClass *RegClass = - MRI.getRegClass(I->getOperand(op).getReg()); - - switch (RegClass->getID()) { - default: break; - case X86::RFP32RegClassID: - case X86::RFP64RegClassID: - case X86::RFP80RegClassID: + if (isFPStackVReg(I->getOperand(op).getReg(), MRI)) return true; - } } } // Check PHI nodes in successor blocks. These PHI's will be lowered to have - // a copy of the input value in this block. In SSE mode, we only care about - // 80-bit values. - - // Final check, check LLVM BB's that are successors to the LLVM BB - // corresponding to BB for FP PHI nodes. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); - SI != E; ++SI) { - const PHINode *PN; - for (BasicBlock::const_iterator II = SI->begin(); - (PN = dyn_cast(II)); ++II) { - if (PN->getType()->isX86_FP80Ty() || - (SSELevel == 0 && PN->getType()->isFloatingPointTy()) || - (SSELevel < 2 && PN->getType()->isDoubleTy())) { + // a copy of the input value in this block, which is a definition of the + // value. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++ SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end(); + I != E; ++I) { + // All PHI nodes are at the top of the block. + if (!I->isPHI()) break; + + if (isFPStackVReg(I->getOperand(0).getReg(), MRI)) return true; - } } } @@ -120,19 +122,12 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // Fast-path: If nothing is using the x87 registers, we don't need to do // any scanning. - MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() && MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() && MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty()) return false; - const X86Subtarget &Subtarget = MF.getTarget().getSubtarget(); - unsigned SSELevel = 0; - if (Subtarget.hasSSE2()) - SSELevel = 2; - else if (Subtarget.hasSSE1()) - SSELevel = 1; - bool Changed = false; MachineFunction::iterator MBBI = MF.begin(); MachineFunction::iterator EndMBB = MF.end(); @@ -149,7 +144,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { } // If we find any FP stack code, emit the FP_REG_KILL instruction. - if (ContainsFPStackCode(MBB, SSELevel, MRI)) { + if (ContainsFPStackCode(MBB, MRI)) { BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(), MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL)); ++NumFPKill; diff --git a/llvm/test/CodeGen/X86/fp-stack.ll b/llvm/test/CodeGen/X86/fp-stack.ll new file mode 100644 index 000000000000..dca644de667e --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-stack.ll @@ -0,0 +1,25 @@ +; RUN: llc %s -o - -mcpu=pentium +; PR6828 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +define void @foo() nounwind { +entry: + %tmp6 = load x86_fp80* undef ; [#uses=2] + %tmp15 = load x86_fp80* undef ; [#uses=2] + %tmp24 = load x86_fp80* undef ; [#uses=1] + br i1 undef, label %return, label %bb.nph + +bb.nph: ; preds = %entry + %cmp139 = fcmp ogt x86_fp80 %tmp15, %tmp6 ; [#uses=1] + %maxdiag.0 = select i1 %cmp139, x86_fp80 %tmp15, x86_fp80 %tmp6 ; [#uses=1] + %cmp139.1 = fcmp ogt x86_fp80 %tmp24, %maxdiag.0 ; [#uses=1] + br i1 %cmp139.1, label %sw.bb372, label %return + +sw.bb372: ; preds = %for.end + ret void + +return: ; preds = %for.end + ret void +} +