ARM: Add GPR register class excluding LR for use with the ADR instruction.

This improves code generation for jump tables by avoiding the emission of "mov pc, lr" which could fool the processor into believing this is a return from a function causing mispredicts. The code generation logic for jump tables uses ADR to materialize the address of the jump target.

Patch by Daniel Stewart!
   

llvm-svn: 190043
This commit is contained in:
Tilmann Scheller 2013-09-05 11:10:31 +00:00
parent f3a5d00491
commit a1787a5835
4 changed files with 99 additions and 3 deletions

View File

@ -1233,7 +1233,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>;
let hasSideEffects = 1 in
def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
def t2LEApcrelJT : t2PseudoInst<(outs jtGPR:$Rd),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
4, IIC_iALUi,
[]>, Sched<[WriteALU, ReadALU]>;

View File

@ -240,6 +240,14 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
}];
}
// jtGPR - Jump Table General Purpose Registers.
// Used by the Thumb2 instructions to prevent Thumb2 jump tables
// from using the LR. The implementation of the jump table uses a mov pc, rA
// type instruction to jump into the table. Use of the LR register (as in
// mov pc, lr) can cause the ARM branch predictor to think it is returning
// from a function instead. This causes a mispredict and a pipe flush.
def jtGPR : RegisterClass<"ARM", [i32], 32, (sub rGPR, LR)>;
// Thumb registers are R0-R7 normally. Some instructions can still use
// the general GPR register class above (MOV, e.g.)
def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;

View File

@ -152,7 +152,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
// otherwise).
MachineRegisterInfo *MRI = &MF.getRegInfo();
MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC,
&ARM::rGPRRegClass,
ARM::gsub_1);
assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass");
const TargetRegisterClass* ConstrainedClass =
MRI->constrainRegClass(SrcReg, TargetClass);
assert(ConstrainedClass && "Couldn't constrain the register class");
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
@ -193,7 +199,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
// otherwise).
MachineRegisterInfo *MRI = &MF.getRegInfo();
MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
const TargetRegisterClass* TargetClass = TRI->getMatchingSuperRegClass(RC,
&ARM::rGPRRegClass,
ARM::gsub_1);
assert(TargetClass && "No Matching GPRPair with gsub_1 in rGPRRegClass");
const TargetRegisterClass* ConstrainedClass =
MRI->constrainRegClass(DestReg, TargetClass);
assert(ConstrainedClass && "Couldn't constrain the register class");
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);

View File

@ -0,0 +1,76 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
define i32 @foo(i32 %n, i32* nocapture %inp) #0 {
; CHECK: foo
; CHECK-NOT: mov pc, lr
.split:
%0 = icmp sgt i32 %n, 1
%1 = add nsw i32 %n, -1
%loop_guard = icmp sgt i32 %1, 0
%or.cond = and i1 %0, %loop_guard
br i1 %or.cond, label %stmt.preheader, label %loop_exit
stmt.preheader: ; preds = %.split
%adjust_ub = add i32 %n, -2
%scevgep6.gep = getelementptr i32* %inp, i32 1
%2 = icmp sgt i32 %adjust_ub, 0
%adjust_ub.op = add i32 %n, -1
%3 = select i1 %2, i32 %adjust_ub.op, i32 1
%xtraiter = and i32 %3, 3
switch i32 %xtraiter, label %stmt.unr [
i32 0, label %stmt.
i32 1, label %stmt.unr30
i32 2, label %stmt.unr16
i32 3, label %stmt.unr8
]
stmt.unr: ; preds = %stmt.preheader
%scevgep6.inc.unr = getelementptr i32* %inp, i32 2
br label %stmt.unr8
stmt.unr8: ; preds = %stmt.preheader, %stmt.unr
%imax.03.reg2mem.0.unr = phi i32 [ 1, %stmt.unr ], [ 0, %stmt.preheader ]
%scevgep6.phi.unr = phi i32* [ %scevgep6.inc.unr, %stmt.unr ], [ %scevgep6.gep, %stmt.preheader ]
%scevgep6.inc.unr15 = getelementptr i32* %scevgep6.phi.unr, i32 1
br label %stmt.unr16
stmt.unr16: ; preds = %stmt.preheader, %stmt.unr8
%imax.03.reg2mem.0.unr17 = phi i32 [ 0, %stmt.unr8 ], [ 0, %stmt.preheader ]
%selv.lcssa.reg2mem.1.unr18 = phi i32 [ 0, %stmt.unr8 ], [ undef, %stmt.preheader ]
%scevgep6.phi.unr19 = phi i32* [ %scevgep6.inc.unr15, %stmt.unr8 ], [ %scevgep6.gep, %stmt.preheader ]
%indvar.unr20 = phi i32 [ 1, %stmt.unr8 ], [ 0, %stmt.preheader ]
%scevgep6.inc.unr27 = getelementptr i32* %scevgep6.phi.unr19, i32 1
br label %stmt.unr30
stmt.unr30: ; preds = %stmt.preheader, %stmt.unr16
%imax.03.reg2mem.0.unr31 = phi i32 [ 1, %stmt.unr16 ], [ 0, %stmt.preheader ]
%selv.lcssa.reg2mem.1.unr32 = phi i32 [ 0, %stmt.unr16 ], [ undef, %stmt.preheader ]
%scevgep6.phi.unr33 = phi i32* [ %scevgep6.inc.unr27, %stmt.unr16 ], [ %scevgep6.gep, %stmt.preheader ]
%indvar.unr34 = phi i32 [ 0, %stmt.unr16 ], [ 1, %stmt.preheader ]
%_p_scalar_.unr36 = load i32* %scevgep6.phi.unr33, align 4
%p_.unr37 = icmp sgt i32 %_p_scalar_.unr36, %imax.03.reg2mem.0.unr31
%scevgep6.inc.unr41 = getelementptr i32* %scevgep6.phi.unr33, i32 1
%4 = icmp ugt i32 %3, 4
br i1 %4, label %stmt., label %loop_exit
loop_exit: ; preds = %stmt.unr30, %stmt., %.split
%itemp.0.lcssa.reg2mem.0 = phi i32 [ undef, %.split ], [ 1, %stmt.unr30 ], [0, %stmt. ]
ret i32 %itemp.0.lcssa.reg2mem.0
stmt.: ; preds = %stmt.preheader, %stmt.unr30, %stmt.
%imax.03.reg2mem.0 = phi i32 [ %p_selv2.3, %stmt. ], [ 1, %stmt.unr30 ], [ 0, %stmt.preheader ]
%selv.lcssa.reg2mem.1 = phi i32 [ 0, %stmt. ], [ 1, %stmt.unr30 ], [ undef, %stmt.preheader ]
%scevgep6.phi = phi i32* [ %scevgep6.inc.3, %stmt. ], [ %scevgep6.inc.unr41, %stmt.unr30 ], [ %scevgep6.gep, %stmt.preheader ]
%indvar = phi i32 [ %scevgep.sum.3, %stmt. ], [ 1, %stmt.unr30 ], [ 0, %stmt.preheader ]
%scevgep.sum = add i32 %indvar, 1
%_p_scalar_ = load i32* %scevgep6.phi, align 4
%p_ = icmp sgt i32 %_p_scalar_, %imax.03.reg2mem.0
%p_selv = select i1 %p_, i32 %scevgep.sum, i32 %selv.lcssa.reg2mem.1
%scevgep.sum.3 = add i32 %indvar, 4
%p_selv2.3 = select i1 %p_, i32 %_p_scalar_, i32 %p_selv
%scevgep6.inc.3 = getelementptr i32* %scevgep6.phi, i32 4
%loop_cond.4 = icmp slt i32 %scevgep.sum.3, %adjust_ub
br i1 %loop_cond.4, label %stmt., label %loop_exit
}