[Hexagon] Simplify A4_rcmp[n]eqi R, 0

Consider cases when register R is known to be zero/non-zero, or when it
is defined by a C2_muxii instruction.

llvm-svn: 338251
This commit is contained in:
Krzysztof Parzyszek 2018-07-30 14:28:02 +00:00
parent 898cd398d3
commit 24fae50905
4 changed files with 311 additions and 3 deletions

View File

@ -1777,6 +1777,7 @@ namespace {
const BitTracker::RegisterCell &RC);
bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD);
// Cache of created instructions to avoid creating duplicates.
// XXX Currently only used by genBitSplit.
@ -2567,6 +2568,127 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
return Changed;
}
bool BitSimplification::simplifyRCmp0(MachineInstr *MI,
BitTracker::RegisterRef RD) {
unsigned Opc = MI->getOpcode();
if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)
return false;
MachineOperand &CmpOp = MI->getOperand(2);
if (!CmpOp.isImm() || CmpOp.getImm() != 0)
return false;
const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
return false;
assert(RD.Sub == 0);
MachineBasicBlock &B = *MI->getParent();
const DebugLoc &DL = MI->getDebugLoc();
auto At = MI->isPHI() ? B.getFirstNonPHI()
: MachineBasicBlock::iterator(MI);
bool KnownZ = true;
bool KnownNZ = false;
BitTracker::RegisterRef SR = MI->getOperand(1);
if (!BT.has(SR.Reg))
return false;
const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg);
unsigned F, W;
if (!HBS::getSubregMask(SR, F, W, MRI))
return false;
for (uint16_t I = F; I != F+W; ++I) {
const BitTracker::BitValue &V = SC[I];
if (!V.is(0))
KnownZ = false;
if (V.is(1))
KnownNZ = true;
}
auto ReplaceWithConst = [&] (int C) {
unsigned NewR = MRI.createVirtualRegister(FRC);
BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)
.addImm(C);
HBS::replaceReg(RD.Reg, NewR, MRI);
BitTracker::RegisterCell NewRC(W);
for (uint16_t I = 0; I != W; ++I) {
NewRC[I] = BitTracker::BitValue(C & 1);
C = unsigned(C) >> 1;
}
BT.put(BitTracker::RegisterRef(NewR), NewRC);
return true;
};
auto IsNonZero = [] (const MachineOperand &Op) {
if (Op.isGlobal() || Op.isBlockAddress())
return true;
if (Op.isImm())
return Op.getImm() != 0;
if (Op.isCImm())
return !Op.getCImm()->isZero();
if (Op.isFPImm())
return !Op.getFPImm()->isZero();
return false;
};
auto IsZero = [] (const MachineOperand &Op) {
if (Op.isGlobal() || Op.isBlockAddress())
return false;
if (Op.isImm())
return Op.getImm() == 0;
if (Op.isCImm())
return Op.getCImm()->isZero();
if (Op.isFPImm())
return Op.getFPImm()->isZero();
return false;
};
// If the source register is known to be 0 or non-0, the comparison can
// be folded to a load of a constant.
if (KnownZ || KnownNZ) {
assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0");
return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));
}
// Special case: if the compare comes from a C2_muxii, then we know the
// two possible constants that can be the source value.
MachineInstr *InpDef = MRI.getVRegDef(SR.Reg);
if (!InpDef)
return false;
if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) {
MachineOperand &Src1 = InpDef->getOperand(2);
MachineOperand &Src2 = InpDef->getOperand(3);
// Check if both are non-zero.
bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);
if (KnownNZ1 && KnownNZ2)
return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);
// Check if both are zero.
bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);
if (KnownZ1 && KnownZ2)
return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);
// If for both operands we know that they are either 0 or non-0,
// replace the comparison with a C2_muxii, using the same predicate
// register, but with operands substituted with 0/1 accordingly.
if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {
unsigned NewR = MRI.createVirtualRegister(FRC);
BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)
.addReg(InpDef->getOperand(1).getReg())
.addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))
.addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));
HBS::replaceReg(RD.Reg, NewR, MRI);
// Create a new cell with only the least significant bit unknown.
BitTracker::RegisterCell NewRC(W);
NewRC[0] = BitTracker::BitValue::self();
NewRC.fill(1, W, BitTracker::BitValue::Zero);
BT.put(BitTracker::RegisterRef(NewR), NewRC);
return true;
}
}
return false;
}
bool BitSimplification::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
if (!BT.reached(&B))
@ -2615,6 +2737,7 @@ bool BitSimplification::processBlock(MachineBasicBlock &B,
T = T || genExtractHalf(MI, RD, RC);
T = T || genCombineHalf(MI, RD, RC);
T = T || genExtractLow(MI, RD, RC);
T = T || simplifyRCmp0(MI, RD);
Changed |= T;
continue;
}

View File

@ -347,9 +347,11 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
return rr0(RC, Outputs);
}
case C2_tfrrp: {
RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
W0 = 8; // XXX Pred size
return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs);
uint16_t RW = W0;
uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
RegisterCell RC = RegisterCell::self(Reg[0].Reg, RW);
RC.fill(PW, RW, BT::BitValue::Zero);
return rr0(eINS(RC, eXTR(rc(1), 0, PW), 0), Outputs);
}
// Arithmetic:
@ -950,6 +952,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
}
default:
// For instructions that define a single predicate registers, store
// the low 8 bits of the register only.
if (unsigned DefR = getUniqueDefVReg(MI)) {
if (MRI.getRegClass(DefR) == &Hexagon::PredRegsRegClass) {
BT::RegisterRef PD(DefR, 0);
uint16_t RW = getRegBitWidth(PD);
uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
RegisterCell RC = RegisterCell::self(DefR, RW);
RC.fill(PW, RW, BT::BitValue::Zero);
putCell(PD, RC, Outputs);
return true;
}
}
return MachineEvaluator::evaluate(MI, Inputs, Outputs);
}
#undef im
@ -1016,6 +1031,21 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
return true;
}
unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const {
unsigned DefReg = 0;
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.isDef())
continue;
unsigned R = Op.getReg();
if (!TargetRegisterInfo::isVirtualRegister(R))
continue;
if (DefReg != 0)
return 0;
DefReg = R;
}
return DefReg;
}
bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
const CellMapType &Inputs,
CellMapType &Outputs) const {

View File

@ -49,6 +49,7 @@ struct HexagonEvaluator : public BitTracker::MachineEvaluator {
const HexagonInstrInfo &TII;
private:
unsigned getUniqueDefVReg(const MachineInstr &MI) const;
bool evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs,
CellMapType &Outputs) const;
bool evaluateFormalCopy(const MachineInstr &MI, const CellMapType &Inputs,

View File

@ -0,0 +1,154 @@
# RUN: llc -march=hexagon -run-pass hexagon-bit-simplify -o - %s | FileCheck %s
--- |
@g0 = global i32 0, align 4
define i32 @f0() { ret i32 0 }
define i32 @f1() { ret i32 0 }
define i32 @f2() { ret i32 0 }
define i32 @f3() { ret i32 0 }
define i32 @f4() { ret i32 0 }
define i32 @f5() { ret i32 0 }
define i32 @f6() { ret i32 0 }
define i32 @f7() { ret i32 0 }
...
# Case 0: is-zero with known zero register
# CHECK-LABEL: name: f0
# CHECK: %[[R00:[0-9]+]]:intregs = A2_tfrsi 1
# CHECK: $r0 = COPY %[[R00]]
# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
name: f0
tracksRegLiveness: true
body: |
bb.0:
%0:intregs = A2_tfrsi 0
%2:intregs = A4_rcmpeqi killed %0, 0
$r0 = COPY %2
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...
# Case 1: is-zero with known non-zero register
# CHECK-LABEL: name: f1
# CHECK: %[[R10:[0-9]+]]:intregs = A2_tfrsi 0
# CHECK: $r0 = COPY %[[R10]]
# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
name: f1
tracksRegLiveness: true
body: |
bb.0:
%0:intregs = A2_tfrsi 128
%2:intregs = A4_rcmpeqi killed %0, 0
$r0 = COPY %2
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...
# Case 2: is-not-zero with known zero register
# CHECK-LABEL: name: f2
# CHECK: %[[R20:[0-9]+]]:intregs = A2_tfrsi 0
# CHECK: $r0 = COPY %[[R20]]
# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
name: f2
tracksRegLiveness: true
body: |
bb.0:
%0:intregs = A2_tfrsi 0
%2:intregs = A4_rcmpneqi killed %0, 0
$r0 = COPY %2
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...
# Case 3: is-not-zero with known non-zero register
# CHECK-LABEL: name: f3
# CHECK: %[[R30:[0-9]+]]:intregs = A2_tfrsi 1
# CHECK: $r0 = COPY %[[R30]]
# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
name: f3
tracksRegLiveness: true
body: |
bb.0:
%0:intregs = A2_tfrsi 1024
%2:intregs = A4_rcmpneqi killed %0, 0
$r0 = COPY %2
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...
# Case 4: is-zero with mux(p, 1, 0)
# CHECK-LABEL: name: f4
# CHECK: %[[R40:[0-9]+]]:predregs = COPY $p0
# CHECK: %[[R41:[0-9]+]]:intregs = C2_muxii %[[R40]], 0, 1
# CHECK: $r0 = COPY %[[R41]]
# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
name: f4
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0
%0:predregs = COPY $p0
%1:intregs = C2_muxii %0, 1, 0
%2:intregs = A4_rcmpeqi killed %1, 0
$r0 = COPY %2
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...
# Case 5: is-zero with mux(p, 0, 1)
# CHECK-LABEL: name: f5
# CHECK: %[[R50:[0-9]+]]:predregs = COPY $p0
# CHECK: %[[R51:[0-9]+]]:intregs = C2_muxii %[[R50]], 1, 0
# CHECK: $r0 = COPY %[[R51]]
# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
name: f5
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0
%0:predregs = COPY $p0
%1:intregs = C2_muxii %0, 0, 1
%2:intregs = A4_rcmpeqi killed %1, 0
$r0 = COPY %2
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...
# Case 6: is-not-zero with mux(p, 1, 2)
# CHECK-LABEL: name: f6
# CHECK: %[[R60:[0-9]+]]:intregs = A2_tfrsi 1
# CHECK: $r0 = COPY %[[R60]]
# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
name: f6
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0
%0:predregs = COPY $p0
%1:intregs = C2_muxii %0, 1, 2
%2:intregs = A4_rcmpneqi killed %1, 0
$r0 = COPY %2
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...
# Case 7: is-not-zero with mux(p, @g0, 2)
# CHECK-LABEL: name: f7
# CHECK: %[[R70:[0-9]+]]:intregs = A2_tfrsi 1
# CHECK: $r0 = COPY %[[R70]]
# CHECK: PS_jmpret $r31, implicit-def dead $pc, implicit $r0
name: f7
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0
%0:predregs = COPY $p0
%1:intregs = C2_muxii %0, @g0, 2
%2:intregs = A4_rcmpneqi killed %1, 0
$r0 = COPY %2
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...