[Thumb] Reapply r272251 with a fix for PR28348 (mk 2)

The important thing I was missing was ensuring newly added constants were kept in topological order. Repositioning the node is correct if the constant is newly added (so it has no topological ordering) but wrong if it already existed - positioning it next in the worklist would break the topological ordering.

Original commit message:
  [Thumb] Select a BIC instead of AND if the immediate can be encoded more optimally negated

  If an immediate is only used in an AND node, it is possible that the immediate can be more optimally materialized when negated. If this is the case, we can negate the immediate and use a BIC instead;

    int i(int a) {
      return a & 0xfffffeec;
    }

  Used to produce:
      ldr r1, [CONSTPOOL]
      ands r0, r1
    CONSTPOOL: 0xfffffeec

  And now produces:
      movs    r1, #255
      adds    r1, #20  ; Less costly immediate generation
      bics    r0, r1

llvm-svn: 274543
This commit is contained in:
James Molloy 2016-07-05 12:37:13 +00:00
parent 136332888a
commit ae5ff990ae
3 changed files with 86 additions and 1 deletions

View File

@ -2820,6 +2820,48 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (tryV6T2BitfieldExtractOp(N, false))
return;
// If an immediate is used in an AND node, it is possible that the immediate
// can be more optimally materialized when negated. If this is the case we
// can negate the immediate and use a BIC instead.
auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
uint32_t Imm = (uint32_t) N1C->getZExtValue();
// In Thumb2 mode, an AND can take a 12-bit immediate. If this
// immediate can be negated and fit in the immediate operand of
// a t2BIC, don't do any manual transform here as this can be
// handled by the generic ISel machinery.
bool PreferImmediateEncoding =
Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
if (!PreferImmediateEncoding &&
ConstantMaterializationCost(Imm) >
ConstantMaterializationCost(~Imm)) {
// The current immediate costs more to materialize than a negated
// immediate, so negate the immediate and use a BIC.
SDValue NewImm =
CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
// If the new constant didn't exist before, reposition it in the topological
// ordering so it is just before N. Otherwise, don't touch its location.
if (NewImm->getNodeId() == -1)
CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
if (!Subtarget->hasThumb2()) {
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
N->getOperand(0), NewImm, getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32)};
ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
return;
} else {
SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32)};
ReplaceNode(N,
CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
return;
}
}
}
// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
// are entirely contributed by c2 and lower 16-bits are entirely contributed
@ -2834,7 +2876,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (!Opc)
break;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
N1C = dyn_cast<ConstantSDNode>(N1);
if (!N1C)
break;
if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {

View File

@ -0,0 +1,26 @@
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-m0 -verify-machineinstrs | FileCheck --check-prefix CHECK-T1 %s
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-m3 -verify-machineinstrs | FileCheck --check-prefix CHECK-T2 %s
; CHECK-T1-LABEL: @i
; CHECK-T2-LABEL: @i
; CHECK-T1: movs r1, #255
; CHECK-T1: adds r1, #20
; CHECK-T1: bics r0, r1
; CHECK-T2: movw r1, #275
; CHECK-T2: bics r0, r1
define i32 @i(i32 %a) {
entry:
%and = and i32 %a, -276
ret i32 %and
}
; CHECK-T1-LABEL: @j
; CHECK-T2-LABEL: @j
; CHECK-T1: movs r1, #128
; CHECK-T1: bics r0, r1
; CHECK-T2: bic r0, r0, #128
define i32 @j(i32 %a) {
entry:
%and = and i32 %a, -129
ret i32 %and
}

View File

@ -0,0 +1,17 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7--linux-gnueabihf"
; CHECK-LABEL: f:
; CHECK: bic
define void @f(i32* nocapture %b, i32* nocapture %c, i32 %a) {
%1 = and i32 %a, -4096
store i32 %1, i32* %c, align 4
%2 = and i32 %a, 4095
%3 = or i32 %2, 4096
%4 = load i32, i32* %b, align 4
%5 = add nsw i32 %4, %3
store i32 %5, i32* %b, align 4
ret void
}