Emit a left-shift instead of a power-of-two multiply for jump-tables
Summary: SelectionDAGLegalize::ExpandNode() inserts an ISD::MUL when lowering a BR_JT opcode. While many backends optimize this multiply into a shift, e.g. the MIPS backend currently always lowers this into a sequence of load-immediate+multiply+mflo in MipsSETargetLowering::lowerMulDiv(). I initially changed the multiply to a shift in the MIPS backend but it turns out that would not have handled the MIPSR6 case and was a lot more code than doing it in LegalizeDAG. I believe performing this simple optimization in LegalizeDAG instead of each individual backend is the better solution since this also fixes other backeds such as MSP430 which calls the multiply runtime function __mspabi_mpyi without this patch. Reviewers: sdardis, atanasyan, pftbest, asl Reviewed By: sdardis Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D45760 llvm-svn: 332439
This commit is contained in:
parent
85e38ee18e
commit
8f44579d0b
|
@ -3688,8 +3688,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
|||
unsigned EntrySize =
|
||||
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
|
||||
|
||||
Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
|
||||
DAG.getConstant(EntrySize, dl, Index.getValueType()));
|
||||
// For power-of-two jumptable entry sizes convert multiplication to a shift.
|
||||
// This transformation needs to be done here since otherwise the MIPS
|
||||
// backend will end up emitting a three instruction multiply sequence
|
||||
// instead of a single shift and MSP430 will call a runtime function.
|
||||
if (llvm::isPowerOf2_32(EntrySize))
|
||||
Index = DAG.getNode(
|
||||
ISD::SHL, dl, Index.getValueType(), Index,
|
||||
DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType()));
|
||||
else
|
||||
Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
|
||||
DAG.getConstant(EntrySize, dl, Index.getValueType()));
|
||||
SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
|
||||
Index, Table);
|
||||
|
||||
|
|
|
@ -7,13 +7,16 @@ target triple = "msp430---elf"
|
|||
define i16 @test(i16 %i) #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: sub.w #4, r1
|
||||
; CHECK-NEXT: mov.w r12, 0(r1)
|
||||
; CHECK-NEXT: cmp.w #4, r12
|
||||
; CHECK-NEXT: jhs .LBB0_3
|
||||
%retval = alloca i16, align 2
|
||||
%i.addr = alloca i16, align 2
|
||||
store i16 %i, i16* %i.addr, align 2
|
||||
%0 = load i16, i16* %i.addr, align 2
|
||||
; CHECK: mov.w #2, r13
|
||||
; CHECK: call #__mspabi_mpyi
|
||||
; CHECK: br .LJTI0_0(r12)
|
||||
; CHECK: rla.w r12
|
||||
; CHECK-NEXT: br .LJTI0_0(r12)
|
||||
switch i16 %0, label %sw.default [
|
||||
i16 0, label %sw.bb
|
||||
i16 1, label %sw.bb1
|
||||
|
|
|
@ -28,7 +28,8 @@ entry:
|
|||
; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
|
||||
; PIC-O32: jr $[[R5]]
|
||||
|
||||
; STATIC-N64: mflo $[[R0:[0-9]]]
|
||||
; STATIC-N64: dsrl $[[I32:[0-9]]], ${{[0-9]+}}, 32
|
||||
; STATIC-N64: dsll $[[R0:[0-9]]], $[[I32]], 3
|
||||
; STATIC-N64: lui $[[R1:[0-9]]], %highest(.LJTI0_0)
|
||||
; STATIC-N64: daddiu $[[R2:[0-9]]], $[[R1]], %higher(.LJTI0_0)
|
||||
; STATIC-N64: dsll $[[R3:[0-9]]], $[[R2]], 16
|
||||
|
|
|
@ -161,9 +161,7 @@ define i8* @_Z3fooi(i32 signext %Letter) {
|
|||
; MIPS64R2-NEXT: beqz $1, .LBB0_3
|
||||
; MIPS64R2-NEXT: nop
|
||||
; MIPS64R2-NEXT: .LBB0_1: # %entry
|
||||
; MIPS64R2-NEXT: daddiu $1, $zero, 8
|
||||
; MIPS64R2-NEXT: dmult $2, $1
|
||||
; MIPS64R2-NEXT: mflo $1
|
||||
; MIPS64R2-NEXT: dsll $1, $2, 3
|
||||
; MIPS64R2-NEXT: lui $2, %highest(.LJTI0_0)
|
||||
; MIPS64R2-NEXT: daddiu $2, $2, %higher(.LJTI0_0)
|
||||
; MIPS64R2-NEXT: dsll $2, $2, 16
|
||||
|
@ -481,9 +479,7 @@ define i8* @_Z3fooi(i32 signext %Letter) {
|
|||
; PIC-MIPS64R2-NEXT: beqz $1, .LBB0_3
|
||||
; PIC-MIPS64R2-NEXT: nop
|
||||
; PIC-MIPS64R2-NEXT: .LBB0_1: # %entry
|
||||
; PIC-MIPS64R2-NEXT: daddiu $1, $zero, 8
|
||||
; PIC-MIPS64R2-NEXT: dmult $3, $1
|
||||
; PIC-MIPS64R2-NEXT: mflo $1
|
||||
; PIC-MIPS64R2-NEXT: dsll $1, $3, 3
|
||||
; PIC-MIPS64R2-NEXT: ld $3, %got_page(.LJTI0_0)($2)
|
||||
; PIC-MIPS64R2-NEXT: daddu $1, $1, $3
|
||||
; PIC-MIPS64R2-NEXT: ld $1, %got_ofst(.LJTI0_0)($1)
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; We used to generate a mul+mflo sequence instead of shifting by 2/3 to get the jump table address
|
||||
; RUN: llc %s -O2 -mtriple=mips64-unknown-freebsd -target-abi n64 -relocation-model=pic -o - | FileCheck %s
|
||||
|
||||
define i64 @test(i64 %arg) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lui $1, %hi(%neg(%gp_rel(test)))
|
||||
; CHECK-NEXT: daddu $2, $1, $25
|
||||
; CHECK-NEXT: sltiu $1, $4, 11
|
||||
; CHECK-NEXT: beqz $1, .LBB0_3
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: .LBB0_1: # %entry
|
||||
; CHECK-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test)))
|
||||
; CHECK-NEXT: dsll $2, $4, 3
|
||||
; Previously this dsll was the following sequence:
|
||||
; daddiu $2, $zero, 8
|
||||
; dmult $4, $2
|
||||
; mflo $2
|
||||
; CHECK-NEXT: ld $3, %got_page(.LJTI0_0)($1)
|
||||
; CHECK-NEXT: daddu $2, $2, $3
|
||||
; CHECK-NEXT: ld $2, %got_ofst(.LJTI0_0)($2)
|
||||
; CHECK-NEXT: daddu $1, $2, $1
|
||||
; CHECK-NEXT: jr $1
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: .LBB0_2: # %sw.bb
|
||||
; CHECK-NEXT: jr $ra
|
||||
; CHECK-NEXT: daddiu $2, $zero, 1
|
||||
; CHECK-NEXT: .LBB0_3: # %default
|
||||
; CHECK-NEXT: jr $ra
|
||||
; CHECK-NEXT: daddiu $2, $zero, 1234
|
||||
; CHECK-NEXT: .LBB0_4: # %sw.bb1
|
||||
; CHECK-NEXT: jr $ra
|
||||
; CHECK-NEXT: daddiu $2, $zero, 0
|
||||
entry:
|
||||
switch i64 %arg, label %default [
|
||||
i64 0, label %sw.bb
|
||||
i64 3, label %sw.bb
|
||||
i64 5, label %sw.bb
|
||||
i64 10, label %sw.bb1
|
||||
]
|
||||
|
||||
default:
|
||||
ret i64 1234
|
||||
|
||||
sw.bb:
|
||||
ret i64 1
|
||||
|
||||
sw.bb1:
|
||||
ret i64 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: .section .rodata,"a",@progbits
|
||||
; CHECK-NEXT: .p2align 3
|
||||
; CHECK-LABEL: .LJTI0_0:
|
||||
; CHECK-NEXT: .gpdword .LBB0_2
|
||||
; CHECK-NEXT: .gpdword .LBB0_3
|
||||
; CHECK-NEXT: .gpdword .LBB0_3
|
||||
; CHECK-NEXT: .gpdword .LBB0_2
|
||||
; CHECK-NEXT: .gpdword .LBB0_3
|
||||
; CHECK-NEXT: .gpdword .LBB0_2
|
||||
; CHECK-NEXT: .gpdword .LBB0_3
|
||||
; CHECK-NEXT: .gpdword .LBB0_3
|
||||
; CHECK-NEXT: .gpdword .LBB0_3
|
||||
; CHECK-NEXT: .gpdword .LBB0_3
|
||||
; CHECK-NEXT: .gpdword .LBB0_4
|
Loading…
Reference in New Issue