[FastISel][AArch64] Custom lower sdiv by power-of-2.

Emit an optimized instruction sequence for sdiv by power-of-2 depending on the exact flag. This fixes rdar://problem/18224511. llvm-svn: 217986
2014-09-17 21:55:55 +00:00 · 2014-09-17 21:55:55 +00:00 · f6430314b4
parent 3e95fa431e
commit f6430314b4
2 changed files with 128 additions and 0 deletions
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@ -133,6 +133,7 @@ private:
  bool selectShift(const Instruction *I);
  bool selectBitCast(const Instruction *I);
  bool selectFRem(const Instruction *I);
  bool selectSDiv(const Instruction *I);
  // Utility helper routines.
  bool isTypeLegal(Type *Ty, MVT &VT);
@ -3980,6 +3981,75 @@ bool AArch64FastISel::selectFRem(const Instruction *I) {
  return true;
 }
 bool AArch64FastISel::selectSDiv(const Instruction *I) {
  MVT VT;
  if (!isTypeLegal(I->getType(), VT))
    return false;
  if (!isa<ConstantInt>(I->getOperand(1)))
    return selectBinaryOp(I, ISD::SDIV);
  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
      !(C.isPowerOf2() || (-C).isPowerOf2()))
    return selectBinaryOp(I, ISD::SDIV);
  unsigned Lg2 = C.countTrailingZeros();
  unsigned Src0Reg = getRegForValue(I->getOperand(0));
  if (!Src0Reg)
    return false;
  bool Src0IsKill = hasTrivialKill(I->getOperand(0));
  if (cast<BinaryOperator>(I)->isExact()) {
    unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
    if (!ResultReg)
      return false;
    updateValueMap(I, ResultReg);
    return true;
  }
  unsigned Pow2MinusOne = (1 << Lg2) - 1;
  unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
                                  /*IsKill=*/false, Pow2MinusOne);
  if (!AddReg)
    return false;
  // (Src0 < 0) ? Pow2 - 1 : 0;
  if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
    return false;
  unsigned SelectOpc;
  const TargetRegisterClass *RC;
  if (VT == MVT::i64) {
    SelectOpc = AArch64::CSELXr;
    RC = &AArch64::GPR64RegClass;
  } else {
    SelectOpc = AArch64::CSELWr;
    RC = &AArch64::GPR32RegClass;
  }
  unsigned SelectReg =
      fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
                       Src0IsKill, AArch64CC::LT);
  if (!SelectReg)
    return false;
  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
  // negate the result.
  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
  unsigned ResultReg;
  if (C.isNegative())
    ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
                              SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
  else
    ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
  if (!ResultReg)
    return false;
  updateValueMap(I, ResultReg);
  return true;
 }
 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
  switch (I->getOpcode()) {
  default:
@ -3989,6 +4059,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
    return selectAddSub(I);
  case Instruction::Mul:
    return selectMul(I);
  case Instruction::SDiv:
    return selectSDiv(I);
  case Instruction::SRem:
    if (!selectBinaryOp(I, ISD::SREM))
      return selectRem(I, ISD::SREM);
--- a/llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll
@ -0,0 +1,56 @@
 ; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
 define i32 @sdiv_i32_exact(i32 %a) {
 ; CHECK-LABEL: sdiv_i32_exact
 ; CHECK:       asr {{w[0-9]+}}, w0, #3
  %1 = sdiv exact i32 %a, 8
  ret i32 %1
 }
 define i32 @sdiv_i32_pos(i32 %a) {
 ; CHECK-LABEL: sdiv_i32_pos
 ; CHECK:       add [[REG1:w[0-9]+]], w0, #7
 ; CHECK-NEXT:  cmp w0, #0
 ; CHECK-NEXT:  csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
 ; CHECK-NEXT:  asr {{w[0-9]+}}, [[REG2]], #3
  %1 = sdiv i32 %a, 8
  ret i32 %1
 }
 define i32 @sdiv_i32_neg(i32 %a) {
 ; CHECK-LABEL: sdiv_i32_neg
 ; CHECK:       add [[REG1:w[0-9]+]], w0, #7
 ; CHECK-NEXT:  cmp w0, #0
 ; CHECK-NEXT:  csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
 ; CHECK-NEXT:  neg {{w[0-9]+}}, [[REG2]], asr #3
  %1 = sdiv i32 %a, -8
  ret i32 %1
 }
 define i64 @sdiv_i64_exact(i64 %a) {
 ; CHECK-LABEL: sdiv_i64_exact
 ; CHECK:       asr {{x[0-9]+}}, x0, #4
  %1 = sdiv exact i64 %a, 16
  ret i64 %1
 }
 define i64 @sdiv_i64_pos(i64 %a) {
 ; CHECK-LABEL: sdiv_i64_pos
 ; CHECK:       add [[REG1:x[0-9]+]], x0, #15
 ; CHECK-NEXT:  cmp x0, #0
 ; CHECK-NEXT:  csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
 ; CHECK-NEXT:  asr {{x[0-9]+}}, [[REG2]], #4
  %1 = sdiv i64 %a, 16
  ret i64 %1
 }
 define i64 @sdiv_i64_neg(i64 %a) {
 ; CHECK-LABEL: sdiv_i64_neg
 ; CHECK:       add [[REG1:x[0-9]+]], x0, #15
 ; CHECK-NEXT:  cmp x0, #0
 ; CHECK-NEXT:  csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
 ; CHECK-NEXT:  neg {{x[0-9]+}}, [[REG2]], asr #4
  %1 = sdiv i64 %a, -16
  ret i64 %1
 }