This patch closes PR28513: an optimization of multiplication by different constants.

It's implemented on DAG combiner level.

llvm-svn: 304209
This commit is contained in:
Andrew V. Tischenko 2017-05-30 13:00:44 +00:00
parent 88d73626cd
commit 8b04826663
4 changed files with 3061 additions and 361 deletions

View File

@ -80,6 +80,12 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
" of the loop header PC will be 0)."),
cl::Hidden);
static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
"SHIFT, LEA, etc."),
cl::Hidden);
/// Call this when the user attempts to do something unsupported, like
/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
/// report_fatal_error, so calling code should attempt to recover without
@ -30928,6 +30934,75 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
}
}
static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
EVT VT, SDLoc DL) {
auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
DAG.getConstant(Mult, DL, VT));
Result = DAG.getNode(ISD::SHL, DL, VT, Result,
DAG.getConstant(Shift, DL, MVT::i8));
Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
Result);
return Result;
};
auto combineMulMulAddOrSub = [&](bool isAdd) {
SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
DAG.getConstant(9, DL, VT));
Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
Result);
return Result;
};
switch (MulAmt) {
default:
break;
case 11:
// mul x, 11 => add ((shl (mul x, 5), 1), x)
return combineMulShlAddOrSub(5, 1, /*isAdd*/ true);
case 21:
// mul x, 21 => add ((shl (mul x, 5), 2), x)
return combineMulShlAddOrSub(5, 2, /*isAdd*/ true);
case 22:
// mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
combineMulShlAddOrSub(5, 2, /*isAdd*/ true));
case 19:
// mul x, 19 => sub ((shl (mul x, 5), 2), x)
return combineMulShlAddOrSub(5, 2, /*isAdd*/ false);
case 13:
// mul x, 13 => add ((shl (mul x, 3), 2), x)
return combineMulShlAddOrSub(3, 2, /*isAdd*/ true);
case 23:
// mul x, 13 => sub ((shl (mul x, 3), 3), x)
return combineMulShlAddOrSub(3, 3, /*isAdd*/ false);
case 14:
// mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
combineMulShlAddOrSub(3, 2, /*isAdd*/ true));
case 26:
// mul x, 26 => sub ((mul (mul x, 9), 3), x)
return combineMulMulAddOrSub(/*isAdd*/ false);
case 28:
// mul x, 28 => add ((mul (mul x, 9), 3), x)
return combineMulMulAddOrSub(/*isAdd*/ true);
case 29:
// mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
combineMulMulAddOrSub(/*isAdd*/ true));
case 30:
// mul x, 30 => sub (sub ((shl x, 5), x), x)
return DAG.getNode(
ISD::SUB, DL, VT, N->getOperand(0),
DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(5, DL, MVT::i8))));
}
return SDValue();
}
/// Optimize a single multiply with constant into two operations in order to
/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
@ -30937,6 +31012,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DAG, Subtarget);
if (!MulConstantOptimization)
return SDValue();
// An imul is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
@ -30992,7 +31069,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
}
} else if (!Subtarget.slowLEA())
NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
if (!NewMul) {
assert(MulAmt != 0 &&

View File

@ -188,13 +188,16 @@ define i16 @test_mul_by_11(i16 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $11, %eax, %eax
; X86-NEXT: leal (%eax,%eax,4), %ecx
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_11:
; X64: # BB#0:
; X64-NEXT: imull $11, %edi, %eax
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,4), %eax
; X64-NEXT: leal (%rdi,%rax,2), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 11
@ -225,13 +228,16 @@ define i16 @test_mul_by_13(i16 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $13, %eax, %eax
; X86-NEXT: leal (%eax,%eax,2), %ecx
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_13:
; X64: # BB#0:
; X64-NEXT: imull $13, %edi, %eax
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,2), %eax
; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 13
@ -241,14 +247,19 @@ define i16 @test_mul_by_13(i16 %x) {
define i16 @test_mul_by_14(i16 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $14, %eax, %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal (%ecx,%ecx,2), %eax
; X86-NEXT: leal (%ecx,%eax,4), %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_14:
; X64: # BB#0:
; X64-NEXT: imull $14, %edi, %eax
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,2), %eax
; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 14
@ -338,14 +349,19 @@ define i16 @test_mul_by_19(i16 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $19, %eax, %eax
; X86-NEXT: leal (%eax,%eax,4), %ecx
; X86-NEXT: shll $2, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_19:
; X64: # BB#0:
; X64-NEXT: imull $19, %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,4), %eax
; X64-NEXT: shll $2, %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 19
ret i16 %mul
@ -375,13 +391,16 @@ define i16 @test_mul_by_21(i16 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $21, %eax, %eax
; X86-NEXT: leal (%eax,%eax,4), %ecx
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_21:
; X64: # BB#0:
; X64-NEXT: imull $21, %edi, %eax
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,4), %eax
; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 21
@ -391,14 +410,19 @@ define i16 @test_mul_by_21(i16 %x) {
define i16 @test_mul_by_22(i16 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $22, %eax, %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal (%ecx,%ecx,4), %eax
; X86-NEXT: leal (%ecx,%eax,4), %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_22:
; X64: # BB#0:
; X64-NEXT: imull $22, %edi, %eax
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,4), %eax
; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 22
@ -409,14 +433,19 @@ define i16 @test_mul_by_23(i16 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $23, %eax, %eax
; X86-NEXT: leal (%eax,%eax,2), %ecx
; X86-NEXT: shll $3, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_23:
; X64: # BB#0:
; X64-NEXT: imull $23, %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,2), %eax
; X64-NEXT: shll $3, %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 23
ret i16 %mul
@ -466,14 +495,19 @@ define i16 @test_mul_by_26(i16 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $26, %eax, %eax
; X86-NEXT: leal (%eax,%eax,8), %ecx
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_26:
; X64: # BB#0:
; X64-NEXT: imull $26, %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,8), %eax
; X64-NEXT: leal (%rax,%rax,2), %eax
; X64-NEXT: subl %eax, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 26
ret i16 %mul
@ -502,14 +536,19 @@ define i16 @test_mul_by_27(i16 %x) {
define i16 @test_mul_by_28(i16 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $28, %eax, %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal (%ecx,%ecx,8), %eax
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_28:
; X64: # BB#0:
; X64-NEXT: imull $28, %edi, %eax
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,8), %eax
; X64-NEXT: leal (%rax,%rax,2), %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 28
@ -519,14 +558,21 @@ define i16 @test_mul_by_28(i16 %x) {
define i16 @test_mul_by_29(i16 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $29, %eax, %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal (%ecx,%ecx,8), %eax
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_29:
; X64: # BB#0:
; X64-NEXT: imull $29, %edi, %eax
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,8), %eax
; X64-NEXT: leal (%rax,%rax,2), %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 29
@ -537,14 +583,22 @@ define i16 @test_mul_by_30(i16 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull $30, %eax, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shll $5, %ecx
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: subl %edx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_30:
; X64: # BB#0:
; X64-NEXT: imull $30, %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shll $5, %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: subl %eax, %ecx
; X64-NEXT: subl %ecx, %edi
; X64-NEXT: movl %edi, %eax
; X64-NEXT: retq
%mul = mul nsw i16 %x, 30
ret i16 %mul
@ -587,3 +641,30 @@ define i16 @test_mul_by_32(i16 %x) {
%mul = mul nsw i16 %x, 32
ret i16 %mul
}
; (x*9+42)*(x*5+2)
define i16 @test_mul_spec(i16 %x) nounwind {
; X86-LABEL: test_mul_spec:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal 42(%eax,%eax,8), %ecx
; X86-NEXT: leal 2(%eax,%eax,4), %eax
; X86-NEXT: imull %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_spec:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 42(%rdi,%rdi,8), %ecx
; X64-NEXT: leal 2(%rdi,%rdi,4), %eax
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 9
%add = add nsw i16 %mul, 42
%mul2 = mul nsw i16 %x, 5
%add2 = add nsw i16 %mul2, 2
%mul3 = mul nsw i16 %add, %add2
ret i16 %mul3
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff