[FastISel][AArch64] Custom lower sdiv by power-of-2.

Emit an optimized instruction sequence for sdiv by power-of-2 depending on the
exact flag.

This fixes rdar://problem/18224511.

llvm-svn: 217986
This commit is contained in:
Juergen Ributzka 2014-09-17 21:55:55 +00:00
parent 3e95fa431e
commit f6430314b4
2 changed files with 128 additions and 0 deletions

View File

@ -133,6 +133,7 @@ private:
bool selectShift(const Instruction *I); bool selectShift(const Instruction *I);
bool selectBitCast(const Instruction *I); bool selectBitCast(const Instruction *I);
bool selectFRem(const Instruction *I); bool selectFRem(const Instruction *I);
bool selectSDiv(const Instruction *I);
// Utility helper routines. // Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT); bool isTypeLegal(Type *Ty, MVT &VT);
@ -3980,6 +3981,75 @@ bool AArch64FastISel::selectFRem(const Instruction *I) {
return true; return true;
} }
bool AArch64FastISel::selectSDiv(const Instruction *I) {
MVT VT;
if (!isTypeLegal(I->getType(), VT))
return false;
if (!isa<ConstantInt>(I->getOperand(1)))
return selectBinaryOp(I, ISD::SDIV);
const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
!(C.isPowerOf2() || (-C).isPowerOf2()))
return selectBinaryOp(I, ISD::SDIV);
unsigned Lg2 = C.countTrailingZeros();
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
bool Src0IsKill = hasTrivialKill(I->getOperand(0));
if (cast<BinaryOperator>(I)->isExact()) {
unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
unsigned Pow2MinusOne = (1 << Lg2) - 1;
unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
/*IsKill=*/false, Pow2MinusOne);
if (!AddReg)
return false;
// (Src0 < 0) ? Pow2 - 1 : 0;
if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
return false;
unsigned SelectOpc;
const TargetRegisterClass *RC;
if (VT == MVT::i64) {
SelectOpc = AArch64::CSELXr;
RC = &AArch64::GPR64RegClass;
} else {
SelectOpc = AArch64::CSELWr;
RC = &AArch64::GPR32RegClass;
}
unsigned SelectReg =
fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
Src0IsKill, AArch64CC::LT);
if (!SelectReg)
return false;
// Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
// negate the result.
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
unsigned ResultReg;
if (C.isNegative())
ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
else
ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) { switch (I->getOpcode()) {
default: default:
@ -3989,6 +4059,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
return selectAddSub(I); return selectAddSub(I);
case Instruction::Mul: case Instruction::Mul:
return selectMul(I); return selectMul(I);
case Instruction::SDiv:
return selectSDiv(I);
case Instruction::SRem: case Instruction::SRem:
if (!selectBinaryOp(I, ISD::SREM)) if (!selectBinaryOp(I, ISD::SREM))
return selectRem(I, ISD::SREM); return selectRem(I, ISD::SREM);

View File

@ -0,0 +1,56 @@
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
define i32 @sdiv_i32_exact(i32 %a) {
; CHECK-LABEL: sdiv_i32_exact
; CHECK: asr {{w[0-9]+}}, w0, #3
%1 = sdiv exact i32 %a, 8
ret i32 %1
}
define i32 @sdiv_i32_pos(i32 %a) {
; CHECK-LABEL: sdiv_i32_pos
; CHECK: add [[REG1:w[0-9]+]], w0, #7
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
; CHECK-NEXT: asr {{w[0-9]+}}, [[REG2]], #3
%1 = sdiv i32 %a, 8
ret i32 %1
}
define i32 @sdiv_i32_neg(i32 %a) {
; CHECK-LABEL: sdiv_i32_neg
; CHECK: add [[REG1:w[0-9]+]], w0, #7
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
; CHECK-NEXT: neg {{w[0-9]+}}, [[REG2]], asr #3
%1 = sdiv i32 %a, -8
ret i32 %1
}
define i64 @sdiv_i64_exact(i64 %a) {
; CHECK-LABEL: sdiv_i64_exact
; CHECK: asr {{x[0-9]+}}, x0, #4
%1 = sdiv exact i64 %a, 16
ret i64 %1
}
define i64 @sdiv_i64_pos(i64 %a) {
; CHECK-LABEL: sdiv_i64_pos
; CHECK: add [[REG1:x[0-9]+]], x0, #15
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
; CHECK-NEXT: asr {{x[0-9]+}}, [[REG2]], #4
%1 = sdiv i64 %a, 16
ret i64 %1
}
define i64 @sdiv_i64_neg(i64 %a) {
; CHECK-LABEL: sdiv_i64_neg
; CHECK: add [[REG1:x[0-9]+]], x0, #15
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
; CHECK-NEXT: neg {{x[0-9]+}}, [[REG2]], asr #4
%1 = sdiv i64 %a, -16
ret i64 %1
}