Fix PR5412: Fix an inverted check and another missing sub-register check.

llvm-svn: 88738
This commit is contained in:
Evan Cheng 2009-11-14 02:09:09 +00:00
parent a627e26d39
commit d2c10508cd
3 changed files with 264 additions and 8 deletions

View File

@ -483,19 +483,20 @@ static void InvalidateKills(MachineInstr &MI,
} }
/// InvalidateRegDef - If the def operand of the specified def MI is now dead /// InvalidateRegDef - If the def operand of the specified def MI is now dead
/// (since it's spill instruction is removed), mark it isDead. Also checks if /// (since its spill instruction is removed), mark it isDead. Also checks if
/// the def MI has other definition operands that are not dead. Returns it by /// the def MI has other definition operands that are not dead. Returns it by
/// reference. /// reference.
static bool InvalidateRegDef(MachineBasicBlock::iterator I, static bool InvalidateRegDef(MachineBasicBlock::iterator I,
MachineInstr &NewDef, unsigned Reg, MachineInstr &NewDef, unsigned Reg,
bool &HasLiveDef) { bool &HasLiveDef,
const TargetRegisterInfo *TRI) {
// Due to remat, it's possible this reg isn't being reused. That is, // Due to remat, it's possible this reg isn't being reused. That is,
// the def of this reg (by prev MI) is now dead. // the def of this reg (by prev MI) is now dead.
MachineInstr *DefMI = I; MachineInstr *DefMI = I;
MachineOperand *DefOp = NULL; MachineOperand *DefOp = NULL;
for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = DefMI->getOperand(i); MachineOperand &MO = DefMI->getOperand(i);
if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef()) if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
continue; continue;
if (MO.getReg() == Reg) if (MO.getReg() == Reg)
DefOp = &MO; DefOp = &MO;
@ -512,7 +513,8 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I,
MachineInstr *NMI = I; MachineInstr *NMI = I;
for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
MachineOperand &MO = NMI->getOperand(j); MachineOperand &MO = NMI->getOperand(j);
if (!MO.isReg() || MO.getReg() != Reg) if (!MO.isReg() || MO.getReg() == 0 ||
(MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
continue; continue;
if (MO.isUse()) if (MO.isUse())
FoundUse = true; FoundUse = true;
@ -556,11 +558,30 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
KillOps[*SR] = NULL; KillOps[*SR] = NULL;
RegKills.reset(*SR); RegKills.reset(*SR);
} }
} else {
// Check for subreg kills as well.
// d4 =
// store d4, fi#0
// ...
// = s8<kill>
// ...
// = d4 <avoiding reload>
for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
unsigned SReg = *SR;
if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
KillOps[SReg]->setIsKill(false);
unsigned KReg = KillOps[SReg]->getReg();
KillOps[KReg] = NULL;
RegKills.reset(KReg);
if (!MI.isRegTiedToDefOperand(i)) for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
// Unless it's a two-address operand, this is the new kill. KillOps[*SSR] = NULL;
MO.setIsKill(); RegKills.reset(*SSR);
} }
}
}
}
if (MO.isKill()) { if (MO.isKill()) {
RegKills.set(Reg); RegKills.set(Reg);
KillOps[Reg] = &MO; KillOps[Reg] = &MO;
@ -1458,7 +1479,7 @@ private:
// being reused. // being reused.
for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
bool HasOtherDef = false; bool HasOtherDef = false;
if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) { if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
MachineInstr *DeadDef = PrevMII; MachineInstr *DeadDef = PrevMII;
if (ReMatDefs.count(DeadDef) && !HasOtherDef) { if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
// FIXME: This assumes a remat def does not have side effects. // FIXME: This assumes a remat def does not have side effects.

View File

@ -0,0 +1,123 @@
; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
; PR5412
%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
%baz = type { %bar*, i32 }
%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
%quad = type { [4 x float] }
%quux = type { [4 x %quuz*], [4 x float], i32 }
%quuz = type { %quad, %quad }
define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
entry:
br i1 undef, label %bb85, label %bb
bb: ; preds = %entry
br i1 undef, label %bb3.i, label %bb2.i
bb2.i: ; preds = %bb
br label %bb3.i
bb3.i: ; preds = %bb2.i, %bb
%0 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
%1 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
%2 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
%3 = load float* %2, align 4 ; <float> [#uses=1]
%4 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
%5 = fsub float %3, undef ; <float> [#uses=2]
%6 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
%7 = load float* %6, align 4 ; <float> [#uses=1]
%8 = fsub float %7, undef ; <float> [#uses=1]
%9 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
%10 = load float* %9, align 4 ; <float> [#uses=1]
%11 = fsub float %10, undef ; <float> [#uses=2]
%12 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
%13 = load float* %12, align 4 ; <float> [#uses=1]
%14 = fsub float %13, undef ; <float> [#uses=1]
%15 = load float* undef, align 4 ; <float> [#uses=1]
%16 = fsub float %15, undef ; <float> [#uses=1]
%17 = fmul float %5, %16 ; <float> [#uses=1]
%18 = fsub float %17, 0.000000e+00 ; <float> [#uses=5]
%19 = fmul float %8, %11 ; <float> [#uses=1]
%20 = fsub float %19, undef ; <float> [#uses=3]
%21 = fmul float %1, %14 ; <float> [#uses=1]
%22 = fmul float %5, %11 ; <float> [#uses=1]
%23 = fsub float %21, %22 ; <float> [#uses=2]
store float %18, float* undef
%24 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
store float %20, float* %24
store float %23, float* undef
%25 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
%26 = fmul float %18, %18 ; <float> [#uses=1]
%27 = fadd float %26, undef ; <float> [#uses=1]
%28 = fadd float %27, undef ; <float> [#uses=1]
%29 = call arm_aapcs_vfpcc float @sqrtf(float %28) readnone ; <float> [#uses=1]
%30 = load float* null, align 4 ; <float> [#uses=2]
%31 = load float* %4, align 4 ; <float> [#uses=2]
%32 = load float* %2, align 4 ; <float> [#uses=2]
%33 = load float* null, align 4 ; <float> [#uses=3]
%34 = load float* %6, align 4 ; <float> [#uses=2]
%35 = fsub float %33, %34 ; <float> [#uses=2]
%36 = fmul float %20, %35 ; <float> [#uses=1]
%37 = fsub float %36, undef ; <float> [#uses=1]
%38 = fmul float %23, 0.000000e+00 ; <float> [#uses=1]
%39 = fmul float %18, %35 ; <float> [#uses=1]
%40 = fsub float %38, %39 ; <float> [#uses=1]
%41 = fmul float %18, 0.000000e+00 ; <float> [#uses=1]
%42 = fmul float %20, 0.000000e+00 ; <float> [#uses=1]
%43 = fsub float %41, %42 ; <float> [#uses=1]
%44 = fmul float 0.000000e+00, %37 ; <float> [#uses=1]
%45 = fmul float %31, %40 ; <float> [#uses=1]
%46 = fadd float %44, %45 ; <float> [#uses=1]
%47 = fmul float %33, %43 ; <float> [#uses=1]
%48 = fadd float %46, %47 ; <float> [#uses=2]
%49 = load float* %9, align 4 ; <float> [#uses=2]
%50 = fsub float %30, %49 ; <float> [#uses=1]
%51 = load float* %12, align 4 ; <float> [#uses=3]
%52 = fsub float %32, %51 ; <float> [#uses=2]
%53 = load float* undef, align 4 ; <float> [#uses=2]
%54 = load float* %24, align 4 ; <float> [#uses=2]
%55 = fmul float %54, undef ; <float> [#uses=1]
%56 = fmul float undef, %52 ; <float> [#uses=1]
%57 = fsub float %55, %56 ; <float> [#uses=1]
%58 = fmul float undef, %52 ; <float> [#uses=1]
%59 = fmul float %54, %50 ; <float> [#uses=1]
%60 = fsub float %58, %59 ; <float> [#uses=1]
%61 = fmul float %30, %57 ; <float> [#uses=1]
%62 = fmul float %32, 0.000000e+00 ; <float> [#uses=1]
%63 = fadd float %61, %62 ; <float> [#uses=1]
%64 = fmul float %34, %60 ; <float> [#uses=1]
%65 = fadd float %63, %64 ; <float> [#uses=2]
%66 = fcmp olt float %48, %65 ; <i1> [#uses=1]
%67 = fsub float %49, 0.000000e+00 ; <float> [#uses=1]
%68 = fsub float %51, %31 ; <float> [#uses=1]
%69 = fsub float %53, %33 ; <float> [#uses=1]
%70 = fmul float undef, %67 ; <float> [#uses=1]
%71 = load float* undef, align 4 ; <float> [#uses=2]
%72 = fmul float %71, %69 ; <float> [#uses=1]
%73 = fsub float %70, %72 ; <float> [#uses=1]
%74 = fmul float %71, %68 ; <float> [#uses=1]
%75 = fsub float %74, 0.000000e+00 ; <float> [#uses=1]
%76 = fmul float %51, %73 ; <float> [#uses=1]
%77 = fadd float undef, %76 ; <float> [#uses=1]
%78 = fmul float %53, %75 ; <float> [#uses=1]
%79 = fadd float %77, %78 ; <float> [#uses=1]
%80 = select i1 %66, float %48, float %65 ; <float> [#uses=1]
%81 = select i1 undef, float %80, float %79 ; <float> [#uses=1]
%iftmp.164.0 = select i1 undef, float %29, float 1.000000e+00 ; <float> [#uses=1]
%82 = fdiv float %81, %iftmp.164.0 ; <float> [#uses=1]
%iftmp.165.0 = select i1 undef, float %82, float 0.000000e+00 ; <float> [#uses=1]
store float %iftmp.165.0, float* undef, align 4
br i1 false, label %bb4.i97, label %ccc.exit98
bb4.i97: ; preds = %bb3.i
br label %ccc.exit98
ccc.exit98: ; preds = %bb4.i97, %bb3.i
ret %bar* null
bb85: ; preds = %entry
ret %bar* null
}
declare arm_aapcs_vfpcc float @sqrtf(float) readnone

View File

@ -0,0 +1,112 @@
; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
; PR5412
%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
%baz = type { %bar*, i32 }
%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
%quad = type { [4 x float] }
%quux = type { [4 x %quuz*], [4 x float], i32 }
%quuz = type { %quad, %quad }
define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
entry:
%0 = load %bar** undef, align 4 ; <%bar*> [#uses=2]
br i1 false, label %bb85, label %bb
bb: ; preds = %entry
br i1 undef, label %bb3.i, label %bb2.i
bb2.i: ; preds = %bb
br label %bb3.i
bb3.i: ; preds = %bb2.i, %bb
%1 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
%2 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
%3 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
%4 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
%5 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
%6 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
%7 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
%8 = fsub float undef, undef ; <float> [#uses=1]
%9 = fmul float 0.000000e+00, %8 ; <float> [#uses=1]
%10 = fmul float %5, 0.000000e+00 ; <float> [#uses=1]
%11 = fsub float %9, %10 ; <float> [#uses=3]
%12 = fmul float %2, 0.000000e+00 ; <float> [#uses=1]
%13 = fmul float 0.000000e+00, undef ; <float> [#uses=1]
%14 = fsub float %12, %13 ; <float> [#uses=2]
store float %14, float* undef
%15 = getelementptr inbounds %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
store float 0.000000e+00, float* %15
%16 = fmul float %11, %11 ; <float> [#uses=1]
%17 = fadd float %16, 0.000000e+00 ; <float> [#uses=1]
%18 = fadd float %17, undef ; <float> [#uses=1]
%19 = call arm_aapcs_vfpcc float @sqrtf(float %18) readnone ; <float> [#uses=2]
%20 = fcmp ogt float %19, 0x3F1A36E2E0000000 ; <i1> [#uses=1]
%21 = load float* %1, align 4 ; <float> [#uses=2]
%22 = load float* %3, align 4 ; <float> [#uses=2]
%23 = load float* undef, align 4 ; <float> [#uses=2]
%24 = load float* %4, align 4 ; <float> [#uses=2]
%25 = fsub float %23, %24 ; <float> [#uses=2]
%26 = fmul float 0.000000e+00, %25 ; <float> [#uses=1]
%27 = fsub float %26, undef ; <float> [#uses=1]
%28 = fmul float %14, 0.000000e+00 ; <float> [#uses=1]
%29 = fmul float %11, %25 ; <float> [#uses=1]
%30 = fsub float %28, %29 ; <float> [#uses=1]
%31 = fsub float undef, 0.000000e+00 ; <float> [#uses=1]
%32 = fmul float %21, %27 ; <float> [#uses=1]
%33 = fmul float undef, %30 ; <float> [#uses=1]
%34 = fadd float %32, %33 ; <float> [#uses=1]
%35 = fmul float %23, %31 ; <float> [#uses=1]
%36 = fadd float %34, %35 ; <float> [#uses=1]
%37 = load float* %6, align 4 ; <float> [#uses=2]
%38 = load float* %7, align 4 ; <float> [#uses=2]
%39 = fsub float %22, %38 ; <float> [#uses=2]
%40 = load float* undef, align 4 ; <float> [#uses=1]
%41 = load float* null, align 4 ; <float> [#uses=2]
%42 = fmul float %41, undef ; <float> [#uses=1]
%43 = fmul float undef, %39 ; <float> [#uses=1]
%44 = fsub float %42, %43 ; <float> [#uses=1]
%45 = fmul float undef, %39 ; <float> [#uses=1]
%46 = fmul float %41, 0.000000e+00 ; <float> [#uses=1]
%47 = fsub float %45, %46 ; <float> [#uses=1]
%48 = fmul float 0.000000e+00, %44 ; <float> [#uses=1]
%49 = fmul float %22, undef ; <float> [#uses=1]
%50 = fadd float %48, %49 ; <float> [#uses=1]
%51 = fmul float %24, %47 ; <float> [#uses=1]
%52 = fadd float %50, %51 ; <float> [#uses=1]
%53 = fsub float %37, %21 ; <float> [#uses=2]
%54 = fmul float undef, undef ; <float> [#uses=1]
%55 = fmul float undef, undef ; <float> [#uses=1]
%56 = fsub float %54, %55 ; <float> [#uses=1]
%57 = fmul float undef, %53 ; <float> [#uses=1]
%58 = load float* undef, align 4 ; <float> [#uses=2]
%59 = fmul float %58, undef ; <float> [#uses=1]
%60 = fsub float %57, %59 ; <float> [#uses=1]
%61 = fmul float %58, undef ; <float> [#uses=1]
%62 = fmul float undef, %53 ; <float> [#uses=1]
%63 = fsub float %61, %62 ; <float> [#uses=1]
%64 = fmul float %37, %56 ; <float> [#uses=1]
%65 = fmul float %38, %60 ; <float> [#uses=1]
%66 = fadd float %64, %65 ; <float> [#uses=1]
%67 = fmul float %40, %63 ; <float> [#uses=1]
%68 = fadd float %66, %67 ; <float> [#uses=1]
%69 = select i1 undef, float %36, float %52 ; <float> [#uses=1]
%70 = select i1 undef, float %69, float %68 ; <float> [#uses=1]
%iftmp.164.0 = select i1 %20, float %19, float 1.000000e+00 ; <float> [#uses=1]
%71 = fdiv float %70, %iftmp.164.0 ; <float> [#uses=1]
store float %71, float* null, align 4
%72 = icmp eq %bar* null, %0 ; <i1> [#uses=1]
br i1 %72, label %bb4.i97, label %ccc.exit98
bb4.i97: ; preds = %bb3.i
%73 = load %bar** undef, align 4 ; <%bar*> [#uses=0]
br label %ccc.exit98
ccc.exit98: ; preds = %bb4.i97, %bb3.i
ret %bar* null
bb85: ; preds = %entry
ret %bar* null
}
declare arm_aapcs_vfpcc float @sqrtf(float) readnone