From 51cbd2d6c4e55c6225568271921253aaf9d9a6a3 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 10 Aug 2009 02:37:24 +0000 Subject: [PATCH] Add support to reduce most of 32-bit Thumb2 arithmetic instructions. llvm-svn: 78550 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 5 +- llvm/lib/Target/ARM/ARMInstrThumb.td | 7 +- llvm/lib/Target/ARM/ARMInstrThumb2.td | 14 +- llvm/lib/Target/ARM/Thumb2SizeReduction.cpp | 212 +++++++++++++------- 4 files changed, 159 insertions(+), 79 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 137e754461a0..bc474f264b71 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -267,8 +267,9 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { } static inline -const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB) { - return MIB.addReg(ARM::CPSR, getDefRegState(true)); +const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB, + bool isDead = false) { + return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead)); } static inline diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 8bf1e2b72422..25dbddf0e235 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -423,7 +423,7 @@ let Defs = [CPSR] in { def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALU, "cmp", " $lhs, $rhs", [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>; -def tCMPZi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALU, +def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALU, "cmp", " $lhs, $rhs", [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>; @@ -434,14 +434,13 @@ let Defs = [CPSR] in { def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, "cmp", " $lhs, $rhs", [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>; -def tCMPZr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, +def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, "cmp", " $lhs, $rhs", [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>; -// TODO: Make use of the followings cmp hi regs def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iALU, "cmp", " $lhs, $rhs", []>; -def tCMPZhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iALU, +def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iALU, "cmp", " $lhs, $rhs", []>; } diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c50482cdbb04..5499763667dd 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -307,11 +307,13 @@ multiclass T2I_adde_sube_irs { let Defs = [CPSR] in { multiclass T2I_rbin_s_is { // shifted imm - def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s), IIC_iALU, + def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s), + IIC_iALU, !strconcat(opc, "${s}.w $dst, $rhs, $lhs"), [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>; // shifted register - def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s), IIC_iALU, + def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s), + IIC_iALU, !strconcat(opc, "${s} $dst, $rhs, $lhs"), [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>; } @@ -997,13 +999,13 @@ def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), // Comparison Instructions... // -defm t2CMP : T2I_cmp_is<"cmp", - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; +defm t2CMP : T2I_cmp_is<"cmp", + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; defm t2CMPz : T2I_cmp_is<"cmp", BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; -defm t2CMN : T2I_cmp_is<"cmn", - BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; +defm t2CMN : T2I_cmp_is<"cmn", + BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; defm t2CMNz : T2I_cmp_is<"cmn", BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index e1bad50d396a..6798483e8572 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -42,15 +42,44 @@ namespace { }; static const ReduceEntry ReduceTable[] = { - // Wide, Narrow1, Narrow2, mm1, imm2, lo1, lo2, P/C, S + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S { ARM::t2ADCrr, ARM::tADC, 0, 0, 0, 1, 0, 0,0, 0 }, + // FIXME: t2ADDS variants. { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, { ARM::t2ANDrr, ARM::tAND, 0, 0, 0, 1, 0, 0,0, 0 }, - { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 1, 0,0, 0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, { ARM::t2ASRrr, ARM::tASRrr, 0, 0, 0, 1, 0, 0,0, 0 }, { ARM::t2BICrr, ARM::tBIC, 0, 0, 0, 1, 0, 0,0, 0 }, - { ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 1,0, 0 } + { ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 1,0, 0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 1,0, 0 }, + { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 1,0, 0 }, + { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 1,0, 0 }, + { ARM::t2EORrr, ARM::tEOR, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, + { ARM::t2LSLrr, ARM::tLSLrr, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, + { ARM::t2LSRrr, ARM::tLSRrr, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, + // FIXME: Do we need the 16-bit 'S' variant? + { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, + { ARM::t2MUL, ARM::tMUL, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2ORRrr, ARM::tORR, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2RORrr, ARM::tROR, 0, 0, 0, 1, 0, 0,0, 0 }, + // FIXME: T2RSBri immediate must be zero. Also need entry for T2RSBS + //{ ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 } }; class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass { @@ -72,14 +101,15 @@ namespace { /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// instruction. - bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr &MI, - const ReduceEntry &Entry); + bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR); /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// non-two-address instruction. - bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr &MI, - const ReduceEntry &Entry); - + bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR); /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); @@ -95,74 +125,87 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { } } -static bool VerifyPredAndCC(MachineInstr &MI, const ReduceEntry &Entry, - bool is2Addr, bool &HasCC) { - const TargetInstrDesc &TID = MI.getDesc(); - - // Most thumb1 instructions either can be predicated or set CPSR. - HasCC = false; - if (TID.hasOptionalDef()) { - unsigned NumOps = TID.getNumOperands(); - HasCC = (MI.getOperand(NumOps-1).getReg() == ARM::CPSR); - } - +static bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, + bool is2Addr, bool LiveCPSR, + bool &HasCC, bool &CCDead) { unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); if ((is2Addr && Entry.PredCC2 == 0) || (!is2Addr && Entry.PredCC1 == 0)) { if (Pred == ARMCC::AL) { // Not predicated, must set CPSR. - if (!HasCC) return false; + if (!HasCC) { + // Original instruction was not setting CPSR, but CPSR is not + // currently live anyway. It's ok to set it. The CPSR def is + // dead though. + if (!LiveCPSR) { + HasCC = true; + CCDead = true; + return true; + } + return false; + } } else { // Predicated, must not set CPSR. - if (HasCC) return false; + if (HasCC) + return false; } } else { - if (HasCC) return false; + // 16-bit instruction does not set CPSR. + if (HasCC) + return false; } return true; } bool -Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr &MI, - const ReduceEntry &Entry) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned Reg0 = MI.getOperand(0).getReg(); - unsigned Reg1 = MI.getOperand(1).getReg(); +Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR) { + const TargetInstrDesc &TID = MI->getDesc(); + unsigned Reg0 = MI->getOperand(0).getReg(); + unsigned Reg1 = MI->getOperand(1).getReg(); if (Reg0 != Reg1) return false; if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { - unsigned Imm = MI.getOperand(2).getImm(); + unsigned Imm = MI->getOperand(2).getImm(); unsigned Limit = (1 << Entry.Imm2Limit) - 1; if (Imm > Limit) return false; } else { - unsigned Reg2 = MI.getOperand(2).getReg(); + unsigned Reg2 = MI->getOperand(2).getReg(); if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) return false; } bool HasCC = false; - if (!VerifyPredAndCC(MI, Entry, true, HasCC)) + bool CCDead = false; + if (TID.hasOptionalDef()) { + unsigned NumOps = TID.getNumOperands(); + HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); + if (HasCC && MI->getOperand(NumOps-1).isDead()) + CCDead = true; + } + if (!VerifyPredAndCC(MI, Entry, true, LiveCPSR, HasCC, CCDead)) return false; // Add the 16-bit instruction. - DebugLoc dl = MI.getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc2)); - MIB.addOperand(MI.getOperand(0)); + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc2)); + MIB.addOperand(MI->getOperand(0)); if (HasCC) - AddDefaultT1CC(MIB); + AddDefaultT1CC(MIB, CCDead); // Transfer the rest of operands. unsigned NumOps = TID.getNumOperands(); - for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) if (!(i < NumOps && TID.OpInfo[i].isOptionalDef())) - MIB.addOperand(MI.getOperand(i)); + MIB.addOperand(MI->getOperand(i)); - DOUT << "Converted 32-bit: " << MI << " to 16-bit: " << *MIB; + DOUT << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; MBB.erase(MI); ++Num2Addrs; @@ -171,17 +214,18 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr &MI, } bool -Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr &MI, - const ReduceEntry &Entry) { +Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR) { unsigned Limit = ~0U; if (Entry.Imm1Limit) Limit = (1 << Entry.Imm1Limit) - 1; - const TargetInstrDesc &TID = MI.getDesc(); + const TargetInstrDesc &TID = MI->getDesc(); for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { if (TID.OpInfo[i].isPredicate()) continue; - const MachineOperand &MO = MI.getOperand(i); + const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) @@ -195,24 +239,31 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr &MI, } bool HasCC = false; - if (!VerifyPredAndCC(MI, Entry, false, HasCC)) + bool CCDead = false; + if (TID.hasOptionalDef()) { + unsigned NumOps = TID.getNumOperands(); + HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); + if (HasCC && MI->getOperand(NumOps-1).isDead()) + CCDead = true; + } + if (!VerifyPredAndCC(MI, Entry, false, LiveCPSR, HasCC, CCDead)) return false; // Add the 16-bit instruction. - DebugLoc dl = MI.getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1)); - MIB.addOperand(MI.getOperand(0)); + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc1)); + MIB.addOperand(MI->getOperand(0)); if (HasCC) - AddDefaultT1CC(MIB); + AddDefaultT1CC(MIB, CCDead); // Transfer the rest of operands. unsigned NumOps = TID.getNumOperands(); - for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) if (!(i < NumOps && TID.OpInfo[i].isOptionalDef())) - MIB.addOperand(MI.getOperand(i)); + MIB.addOperand(MI->getOperand(i)); - DOUT << "Converted 32-bit: " << MI << " to 16-bit: " << *MIB; + DOUT << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB; MBB.erase(MI); ++Num2Addrs; @@ -220,38 +271,65 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr &MI, return true; } +static bool UpdateCPSRLiveness(MachineInstr &MI, bool LiveCPSR) { + bool HasDef = false; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isUndef()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + if (MO.isDef()) { + if (!MO.isDead()) + HasDef = true; + continue; + } + + assert(LiveCPSR && "CPSR liveness tracking is wrong!"); + if (MO.isKill()) { + LiveCPSR = false; + break; + } + } + + return HasDef || LiveCPSR; +} + bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // FIXME: Track whether CPSR is live. If not, then it's possible to convert // one that doesn't set CPSR to one that does. + bool LiveCPSR = false; MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII = next(MII); for (; MII != E; MII = NextMII) { NextMII = next(MII); - MachineInstr &MI = *MII; - unsigned Opcode = MI.getOpcode(); + MachineInstr *MI = &*MII; + unsigned Opcode = MI->getOpcode(); DenseMap::iterator OPI = ReduceOpcodeMap.find(Opcode); - if (OPI == ReduceOpcodeMap.end()) - continue; + if (OPI != ReduceOpcodeMap.end()) { + const ReduceEntry &Entry = ReduceTable[OPI->second]; + // Ignore "special" cases for now. + if (Entry.Special) + goto ProcessNext; - const ReduceEntry &Entry = ReduceTable[OPI->second]; - // Ignore "special" cases for now. - if (Entry.Special) - continue; + // Try to transform to a 16-bit two-address instruction. + if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { + Modified = true; + MachineBasicBlock::iterator I = prior(NextMII); + MI = &*I; + goto ProcessNext; + } - // Try to transform to a 16-bit two-address instruction. - if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry)) { - Modified = true; - continue; + // Try to transform ro a 16-bit non-two-address instruction. + if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) + Modified = true; } - // Try to transform ro a 16-bit non-two-address instruction. - if (ReduceToNarrow(MBB, MI, Entry)) { - Modified = true; - continue; - } + ProcessNext: + LiveCPSR = UpdateCPSRLiveness(*MI, LiveCPSR); } return Modified;