Re-re-land: Teach CorrelatedValuePropagation to reduce the width of udiv/urem instructions.

Summary:
If the operands of a udiv/urem can be proved to fit within a smaller
power-of-two-sized type, reduce the width of the udiv/urem.

Backed out for causing performance regressions.  Re-landing
because we've determined that these regressions were noise.

Original Differential Revision: https://reviews.llvm.org/D44102

llvm-svn: 328096
This commit is contained in:
Justin Lebar 2018-03-21 14:08:21 +00:00
parent daf69ac2b2
commit 038cbc5c13
3 changed files with 250 additions and 0 deletions

View File

@ -58,6 +58,7 @@ STATISTIC(NumCmps, "Number of comparisons propagated");
STATISTIC(NumReturns, "Number of return values propagated");
STATISTIC(NumDeadCases, "Number of switch cases removed");
STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
STATISTIC(NumUDivs, "Number of udivs whose width was decreased");
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumOverflows, "Number of overflow checks removed");
@ -432,6 +433,48 @@ static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) {
return true;
}
/// Try to shrink a udiv/urem's width down to the smallest power of two that's
/// sufficient to contain its operands.
static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
assert(Instr->getOpcode() == Instruction::UDiv ||
Instr->getOpcode() == Instruction::URem);
if (Instr->getType()->isVectorTy())
return false;
// Find the smallest power of two bitwidth that's sufficient to hold Instr's
// operands.
auto OrigWidth = Instr->getType()->getIntegerBitWidth();
ConstantRange OperandRange(OrigWidth, /*isFullset=*/false);
for (Value *Operand : Instr->operands()) {
OperandRange = OperandRange.unionWith(
LVI->getConstantRange(Operand, Instr->getParent()));
}
// Don't shrink below 8 bits wide.
unsigned NewWidth = std::max<unsigned>(
PowerOf2Ceil(OperandRange.getUnsignedMax().getActiveBits()), 8);
// NewWidth might be greater than OrigWidth if OrigWidth is not a power of
// two.
if (NewWidth >= OrigWidth)
return false;
++NumUDivs;
auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
auto *LHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(0), TruncTy,
Instr->getName() + ".lhs.trunc", Instr);
auto *RHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(1), TruncTy,
Instr->getName() + ".rhs.trunc", Instr);
auto *BO =
BinaryOperator::Create(Instr->getOpcode(), LHS, RHS, Instr->getName(), Instr);
auto *Zext = CastInst::Create(Instruction::ZExt, BO, Instr->getType(),
Instr->getName() + ".zext", Instr);
if (BO->getOpcode() == Instruction::UDiv)
BO->setIsExact(Instr->isExact());
Instr->replaceAllUsesWith(Zext);
Instr->eraseFromParent();
return true;
}
static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI))
return false;
@ -441,6 +484,10 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
SDI->getName(), SDI);
SDI->replaceAllUsesWith(BO);
SDI->eraseFromParent();
// Try to process our new urem.
processUDivOrURem(BO, LVI);
return true;
}
@ -460,6 +507,9 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
SDI->replaceAllUsesWith(BO);
SDI->eraseFromParent();
// Try to simplify our new udiv.
processUDivOrURem(BO, LVI);
return true;
}
@ -595,6 +645,10 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) {
case Instruction::SDiv:
BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI);
break;
case Instruction::UDiv:
case Instruction::URem:
BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI);
break;
case Instruction::AShr:
BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
break;

View File

@ -0,0 +1,95 @@
; RUN: opt < %s -correlated-propagation -S | FileCheck %s
; CHECK-LABEL: @test_nop
define void @test_nop(i32 %n) {
; CHECK udiv i32 %n, 100
%div = udiv i32 %n, 100
ret void
}
; CHECK-LABEL: @test1(
define void @test1(i32 %n) {
entry:
%cmp = icmp ule i32 %n, 65535
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: udiv i16
%div = udiv i32 %n, 100
br label %exit
exit:
ret void
}
; CHECK-LABEL: @test2(
define void @test2(i32 %n) {
entry:
%cmp = icmp ule i32 %n, 65536
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: udiv i32 %n, 100
%div = udiv i32 %n, 100
br label %exit
exit:
ret void
}
; CHECK-LABEL: @test3(
define void @test3(i32 %m, i32 %n) {
entry:
%cmp1 = icmp ult i32 %m, 65535
%cmp2 = icmp ult i32 %n, 65535
%cmp = and i1 %cmp1, %cmp2
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: udiv i16
%div = udiv i32 %m, %n
br label %exit
exit:
ret void
}
; CHECK-LABEL: @test4(
define void @test4(i32 %m, i32 %n) {
entry:
%cmp1 = icmp ult i32 %m, 65535
%cmp2 = icmp ule i32 %n, 65536
%cmp = and i1 %cmp1, %cmp2
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: udiv i32 %m, %n
%div = udiv i32 %m, %n
br label %exit
exit:
ret void
}
; CHECK-LABEL: @test5
define void @test5(i32 %n) {
%trunc = and i32 %n, 65535
; CHECK: udiv i16
%div = udiv i32 %trunc, 42
ret void
}
; CHECK-LABEL: @test6
define void @test6(i32 %n) {
entry:
%cmp = icmp ule i32 %n, 255
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: udiv i8
%div = sdiv i32 %n, 100
br label %exit
exit:
ret void
}

View File

@ -0,0 +1,101 @@
; RUN: opt < %s -correlated-propagation -S | FileCheck %s
; CHECK-LABEL: @test_nop
define void @test_nop(i32 %n) {
; CHECK udiv i32 %n, 100
%div = udiv i32 %n, 100
ret void
}
; CHECK-LABEL: @test1(
define void @test1(i32 %n) {
entry:
%cmp = icmp ule i32 %n, 65535
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: urem i16
%div = urem i32 %n, 100
br label %exit
exit:
ret void
}
; CHECK-LABEL: @test2(
define void @test2(i32 %n) {
entry:
%cmp = icmp ule i32 %n, 65536
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: urem i32 %n, 100
%div = urem i32 %n, 100
br label %exit
exit:
ret void
}
; CHECK-LABEL: @test3(
define void @test3(i32 %m, i32 %n) {
entry:
%cmp1 = icmp ult i32 %m, 65535
%cmp2 = icmp ult i32 %n, 65535
%cmp = and i1 %cmp1, %cmp2
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: urem i16
%div = urem i32 %m, %n
br label %exit
exit:
ret void
}
; CHECK-LABEL: @test4(
define void @test4(i32 %m, i32 %n) {
entry:
%cmp1 = icmp ult i32 %m, 65535
%cmp2 = icmp ule i32 %n, 65536
%cmp = and i1 %cmp1, %cmp2
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: urem i32 %m, %n
%div = urem i32 %m, %n
br label %exit
exit:
ret void
}
; CHECK-LABEL: @test5
define void @test5(i32 %n) {
%trunc = and i32 %n, 63
; CHECK: urem i8
%div = urem i32 %trunc, 42
ret void
}
; CHECK-LABEL: @test6
define void @test6(i32 %n) {
entry:
%cmp = icmp ule i32 %n, 255
br i1 %cmp, label %bb, label %exit
bb:
; CHECK: urem i8
%div = srem i32 %n, 100
br label %exit
exit:
ret void
}
; CHECK-LABEL: @non_power_of_2
define void @non_power_of_2(i24 %n) {
%div = urem i24 %n, 42
ret void
}