From feb9f4243bf6f8f6b9b554f78a81b959567ec040 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Thu, 9 Jun 2016 07:39:08 +0000 Subject: [PATCH] [Thumb] Select a BIC instead of AND if the immediate can be encoded more optimally negated If an immediate is only used in an AND node, it is possible that the immediate can be more optimally materialized when negated. If this is the case, we can negate the immediate and use a BIC instead; int i(int a) { return a & 0xfffffeec; } Used to produce: ldr r1, [CONSTPOOL] ands r0, r1 CONSTPOOL: 0xfffffeec And now produces: movs r1, #255 adds r1, #20 ; Less costly immediate generation bics r0, r1 llvm-svn: 272251 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 41 ++++++++++++++++++++++++- llvm/test/CodeGen/Thumb/bic_imm.ll | 15 +++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Thumb/bic_imm.ll diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index cc835df15b30..a17c8ff0e10d 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2820,6 +2820,45 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (tryV6T2BitfieldExtractOp(N, false)) return; + // If an immediate is used in an AND node, it is possible that the immediate + // can be more optimally materialized when negated. If this is the case we + // can negate the immediate and use a BIC instead. + auto *N1C = dyn_cast(N->getOperand(1)); + if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { + uint32_t Imm = (uint32_t) N1C->getZExtValue(); + + // In Thumb2 mode, an AND can take a 12-bit immediate. If this + // immediate can be negated and fit in the immediate operand of + // a t2BIC, don't do any manual transform here as this can be + // handled by the generic ISel machinery. + bool PreferImmediateEncoding = + Subtarget->hasThumb2() && !is_t2_so_imm(Imm) && is_t2_so_imm_not(Imm); + if (!PreferImmediateEncoding && + ConstantMaterializationCost(Imm) > + ConstantMaterializationCost(~Imm)) { + // The current immediate costs more to materialize than a negated + // immediate, so negate the immediate and use a BIC. + SDValue NewImm = + CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); + CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); + + if (!Subtarget->hasThumb2()) { + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), + N->getOperand(0), NewImm, getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32)}; + ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); + return; + } else { + SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32)}; + ReplaceNode(N, + CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); + return; + } + } + } + // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits // are entirely contributed by c2 and lower 16-bits are entirely contributed @@ -2834,7 +2873,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (!Opc) break; SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast(N1); + N1C = dyn_cast(N1); if (!N1C) break; if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { diff --git a/llvm/test/CodeGen/Thumb/bic_imm.ll b/llvm/test/CodeGen/Thumb/bic_imm.ll new file mode 100644 index 000000000000..6a48cb6f63b3 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/bic_imm.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-m0 -verify-machineinstrs | FileCheck --check-prefix CHECK-T1 %s +; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-m3 -verify-machineinstrs | FileCheck --check-prefix CHECK-T2 %s + +; CHECK-T1-LABEL: @i +; CHECK-T2-LABEL: @i +; CHECK-T1: movs r1, #255 +; CHECK-T1: adds r1, #20 +; CHECK-T1: bics r0, r1 +; CHECK-T2: movw r1, #275 +; CHECK-T2: bics r0, r1 +define i32 @i(i32 %a) { +entry: + %and = and i32 %a, -276 + ret i32 %and +}