Emit a left-shift instead of a power-of-two multiply for jump-tables

Summary:
SelectionDAGLegalize::ExpandNode() inserts an ISD::MUL when lowering a
BR_JT opcode. While many backends optimize this multiply into a shift, e.g.
the MIPS backend currently always lowers this into a sequence of
load-immediate+multiply+mflo in MipsSETargetLowering::lowerMulDiv().

I initially changed the multiply to a shift in the MIPS backend but it
turns out that would not have handled the MIPSR6 case and was a lot more
code than doing it in LegalizeDAG.
I believe performing this simple optimization in LegalizeDAG instead of
each individual backend is the better solution since this also fixes other
backeds such as MSP430 which calls the multiply runtime function
__mspabi_mpyi without this patch.

Reviewers: sdardis, atanasyan, pftbest, asl

Reviewed By: sdardis

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D45760

llvm-svn: 332439
This commit is contained in:
Alexander Richardson 2018-05-16 08:58:26 +00:00
parent 85e38ee18e
commit 8f44579d0b
5 changed files with 87 additions and 12 deletions

View File

@ -3688,8 +3688,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
DAG.getConstant(EntrySize, dl, Index.getValueType()));
// For power-of-two jumptable entry sizes convert multiplication to a shift.
// This transformation needs to be done here since otherwise the MIPS
// backend will end up emitting a three instruction multiply sequence
// instead of a single shift and MSP430 will call a runtime function.
if (llvm::isPowerOf2_32(EntrySize))
Index = DAG.getNode(
ISD::SHL, dl, Index.getValueType(), Index,
DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType()));
else
Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
DAG.getConstant(EntrySize, dl, Index.getValueType()));
SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
Index, Table);

View File

@ -7,13 +7,16 @@ target triple = "msp430---elf"
define i16 @test(i16 %i) #0 {
entry:
; CHECK-LABEL: test:
; CHECK: sub.w #4, r1
; CHECK-NEXT: mov.w r12, 0(r1)
; CHECK-NEXT: cmp.w #4, r12
; CHECK-NEXT: jhs .LBB0_3
%retval = alloca i16, align 2
%i.addr = alloca i16, align 2
store i16 %i, i16* %i.addr, align 2
%0 = load i16, i16* %i.addr, align 2
; CHECK: mov.w #2, r13
; CHECK: call #__mspabi_mpyi
; CHECK: br .LJTI0_0(r12)
; CHECK: rla.w r12
; CHECK-NEXT: br .LJTI0_0(r12)
switch i16 %0, label %sw.default [
i16 0, label %sw.bb
i16 1, label %sw.bb1

View File

@ -28,7 +28,8 @@ entry:
; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
; PIC-O32: jr $[[R5]]
; STATIC-N64: mflo $[[R0:[0-9]]]
; STATIC-N64: dsrl $[[I32:[0-9]]], ${{[0-9]+}}, 32
; STATIC-N64: dsll $[[R0:[0-9]]], $[[I32]], 3
; STATIC-N64: lui $[[R1:[0-9]]], %highest(.LJTI0_0)
; STATIC-N64: daddiu $[[R2:[0-9]]], $[[R1]], %higher(.LJTI0_0)
; STATIC-N64: dsll $[[R3:[0-9]]], $[[R2]], 16

View File

@ -161,9 +161,7 @@ define i8* @_Z3fooi(i32 signext %Letter) {
; MIPS64R2-NEXT: beqz $1, .LBB0_3
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: .LBB0_1: # %entry
; MIPS64R2-NEXT: daddiu $1, $zero, 8
; MIPS64R2-NEXT: dmult $2, $1
; MIPS64R2-NEXT: mflo $1
; MIPS64R2-NEXT: dsll $1, $2, 3
; MIPS64R2-NEXT: lui $2, %highest(.LJTI0_0)
; MIPS64R2-NEXT: daddiu $2, $2, %higher(.LJTI0_0)
; MIPS64R2-NEXT: dsll $2, $2, 16
@ -481,9 +479,7 @@ define i8* @_Z3fooi(i32 signext %Letter) {
; PIC-MIPS64R2-NEXT: beqz $1, .LBB0_3
; PIC-MIPS64R2-NEXT: nop
; PIC-MIPS64R2-NEXT: .LBB0_1: # %entry
; PIC-MIPS64R2-NEXT: daddiu $1, $zero, 8
; PIC-MIPS64R2-NEXT: dmult $3, $1
; PIC-MIPS64R2-NEXT: mflo $1
; PIC-MIPS64R2-NEXT: dsll $1, $3, 3
; PIC-MIPS64R2-NEXT: ld $3, %got_page(.LJTI0_0)($2)
; PIC-MIPS64R2-NEXT: daddu $1, $1, $3
; PIC-MIPS64R2-NEXT: ld $1, %got_ofst(.LJTI0_0)($1)

View File

@ -0,0 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; We used to generate a mul+mflo sequence instead of shifting by 2/3 to get the jump table address
; RUN: llc %s -O2 -mtriple=mips64-unknown-freebsd -target-abi n64 -relocation-model=pic -o - | FileCheck %s
define i64 @test(i64 %arg) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui $1, %hi(%neg(%gp_rel(test)))
; CHECK-NEXT: daddu $2, $1, $25
; CHECK-NEXT: sltiu $1, $4, 11
; CHECK-NEXT: beqz $1, .LBB0_3
; CHECK-NEXT: nop
; CHECK-NEXT: .LBB0_1: # %entry
; CHECK-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test)))
; CHECK-NEXT: dsll $2, $4, 3
; Previously this dsll was the following sequence:
; daddiu $2, $zero, 8
; dmult $4, $2
; mflo $2
; CHECK-NEXT: ld $3, %got_page(.LJTI0_0)($1)
; CHECK-NEXT: daddu $2, $2, $3
; CHECK-NEXT: ld $2, %got_ofst(.LJTI0_0)($2)
; CHECK-NEXT: daddu $1, $2, $1
; CHECK-NEXT: jr $1
; CHECK-NEXT: nop
; CHECK-NEXT: .LBB0_2: # %sw.bb
; CHECK-NEXT: jr $ra
; CHECK-NEXT: daddiu $2, $zero, 1
; CHECK-NEXT: .LBB0_3: # %default
; CHECK-NEXT: jr $ra
; CHECK-NEXT: daddiu $2, $zero, 1234
; CHECK-NEXT: .LBB0_4: # %sw.bb1
; CHECK-NEXT: jr $ra
; CHECK-NEXT: daddiu $2, $zero, 0
entry:
switch i64 %arg, label %default [
i64 0, label %sw.bb
i64 3, label %sw.bb
i64 5, label %sw.bb
i64 10, label %sw.bb1
]
default:
ret i64 1234
sw.bb:
ret i64 1
sw.bb1:
ret i64 0
}
; CHECK-LABEL: .section .rodata,"a",@progbits
; CHECK-NEXT: .p2align 3
; CHECK-LABEL: .LJTI0_0:
; CHECK-NEXT: .gpdword .LBB0_2
; CHECK-NEXT: .gpdword .LBB0_3
; CHECK-NEXT: .gpdword .LBB0_3
; CHECK-NEXT: .gpdword .LBB0_2
; CHECK-NEXT: .gpdword .LBB0_3
; CHECK-NEXT: .gpdword .LBB0_2
; CHECK-NEXT: .gpdword .LBB0_3
; CHECK-NEXT: .gpdword .LBB0_3
; CHECK-NEXT: .gpdword .LBB0_3
; CHECK-NEXT: .gpdword .LBB0_3
; CHECK-NEXT: .gpdword .LBB0_4