ARM: convert ORR instructions to ADD where possible on Thumb.

Thumb has more 16-bit encoding space dedicated to ADD than ORR, allowing both a
3-address encoding and a wider range of immediates. So, particularly when
optimizing for code size (but it doesn't make things worse elsewhere) it's
beneficial to select an OR operation to an ADD if we know overflow won't occur.

This is made even better by LLVM's penchant for putting operations in canonical
form by converting the other way.

llvm-svn: 335119
This commit is contained in:
Tim Northover 2018-06-20 12:09:44 +00:00
parent 70666e7765
commit 644a819534
7 changed files with 84 additions and 4 deletions

View File

@ -97,6 +97,8 @@ public:
return SelectImmShifterOperand(N, A, B, false);
}
bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
@ -569,6 +571,14 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
return true;
}
// Determine whether an ISD::OR's operands are suitable to turn the operation
// into an addition, which often has more compact encodings.
bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
Out = N;
return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
}
bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
SDValue &Base,

View File

@ -270,6 +270,14 @@ def t_addrmode_sp : MemOperand,
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
// Inspects parent to determine whether an or instruction can be implemented as
// an add (i.e. whether we know overflow won't occur in the add).
def AddLikeOrOp : ComplexPattern<i32, 1, "SelectAddLikeOr", [],
[SDNPWantParent]>;
// Pattern to exclude immediates from matching
def non_imm32 : PatLeaf<(i32 GPR), [{ return !isa<ConstantSDNode>(N); }]>;
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//
@ -997,6 +1005,15 @@ let isAdd = 1 in {
}
}
// Thumb has more flexible short encodings for ADD than ORR, so use those where
// possible.
def : T1Pat<(or AddLikeOrOp:$Rn, imm0_7:$imm), (tADDi3 $Rn, imm0_7:$imm)>;
def : T1Pat<(or AddLikeOrOp:$Rn, imm8_255:$imm), (tADDi8 $Rn, imm8_255:$imm)>;
def : T1Pat<(or AddLikeOrOp:$Rn, tGPR:$Rm), (tADDrr $Rn, $Rm)>;
def : tInstAlias <"add${s}${p} $Rdn, $Rm",
(tADDrr tGPR:$Rdn,s_cc_out:$s, tGPR:$Rdn, tGPR:$Rm, pred:$p)>;

View File

@ -2594,6 +2594,18 @@ def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
def : T2Pat<(t2_so_imm_not:$src),
(t2MVNi t2_so_imm_not:$src)>;
// There are shorter Thumb encodings for ADD than ORR, so to increase
// Thumb2SizeReduction's chances later on we select a t2ADD for an or where
// possible.
def : T2Pat<(or AddLikeOrOp:$Rn, t2_so_imm:$imm),
(t2ADDri $Rn, t2_so_imm:$imm)>;
def : T2Pat<(or AddLikeOrOp:$Rn, imm0_4095:$Rm),
(t2ADDri12 $Rn, imm0_4095:$Rm)>;
def : T2Pat<(or AddLikeOrOp:$Rn, non_imm32:$Rm),
(t2ADDrr $Rn, $Rm)>;
//===----------------------------------------------------------------------===//
// Multiply Instructions.
//

View File

@ -0,0 +1,41 @@
; RUN: llc -mtriple=thumbv6m-apple-macho %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T1
; RUN: llc -mtriple=thumbv7m-apple-macho %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2
define i32 @test_add_i3(i1 %tst, i32 %a, i32 %b) {
; CHECK-LABEL: test_add_i3:
; CHECK: adds r0, {{r[0-9]+}}, #2
%tmp = and i32 %a, -7
%tmp1 = and i32 %b, -4
%int = select i1 %tst, i32 %tmp, i32 %tmp1
; Call to force %int into a register that isn't r0 so using the i3 form is a
; good idea.
call void @foo(i32 %int)
%res = or i32 %int, 2
ret i32 %res
}
define i32 @test_add_i8(i32 %a, i32 %b, i1 %tst) {
; CHECK-LABEL: test_add_i8:
; CHECK-T1: adds r0, #12
; CHECK-T2: add.w r0, {{r[0-9]+}}, #12
%tmp = and i32 %a, -256
%tmp1 = and i32 %b, -512
%int = select i1 %tst, i32 %tmp, i32 %tmp1
%res = or i32 %int, 12
ret i32 %res
}
define i32 @test_add_i12(i32 %a, i32 %b, i1 %tst) {
; CHECK-LABEL: test_add_i12:
; CHECK-T2: addw r0, {{r[0-9]+}}, #854
%tmp = and i32 %a, -4096
%tmp1 = and i32 %b, -8192
%int = select i1 %tst, i32 %tmp, i32 %tmp1
%res = or i32 %int, 854
ret i32 %res
}
declare void @foo(i32)

View File

@ -130,7 +130,7 @@ entry:
; CHECK-V6M: ldrh [[LOW:r[0-9]+]], [r0, #2]
; CHECK-V6M: ldr [[HIGH:r[0-9]+]], [r0, #4]
; CHECK-V6M-NEXT: lsls [[HIGH]], [[HIGH]], #16
; CHECK-V6M-NEXT: orrs r0, r1
; CHECK-V6M-NEXT: adds r0, r1, r0
; CHECK-ALIGN: ldr [[HIGH:r[0-9]+]], [r0, #4]
; CHECK-ALIGN-NEXT: ldrh [[LOW:r[0-9]+]], [r0, #2]
; CHECK-ALIGN-NEXT: orr.w r0, [[LOW]], [[HIGH]], lsl #16

View File

@ -15,7 +15,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
; CHECK: bl _f2
; CHECK: cmp r0, #0
; CHECK: cmp {{r[0-9]+}}, #0
; CHECK-NOT: cmp
; CHECK: InlineAsm Start
define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {

View File

@ -109,13 +109,13 @@ define i32 @test10(i32 %p0) {
; CHECK-DSP: and.w r0, r1, r0, lsr #7
; CHECK-DSP: lsrs r1, r0, #5
; CHECK-DSP: uxtb16 r1, r1
; CHECk-DSP: orrs r0, r1
; CHECk-DSP: adds r0, r1
; CHECK-NO-DSP: mov.w r1, #16253176
; CHECK-NO-DSP: and.w r0, r1, r0, lsr #7
; CHECK-NO-DSP: mov.w r1, #458759
; CHECK-NO-DSP: and.w r1, r1, r0, lsr #5
; CHECK-NO-DSP: orrs r0, r1
; CHECK-NO-DSP: add r0, r1
%tmp1 = lshr i32 %p0, 7 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 16253176 ; <i32> [#uses=2]
%tmp4 = lshr i32 %tmp2, 5 ; <i32> [#uses=1]