[InstCombine] fix operand-complexity-based canonicalization (PR28296)

The code comments didn't match the code logic, and we didn't actually distinguish the fake unary (not/neg/fneg) 
operators from arguments. Adding another level to the weighting scheme provides more structure and can help 
simplify the pattern matching in InstCombine and other places.

I fixed regressions that would have shown up from this change in:
rL290067
rL290127

But that doesn't mean there are no pattern-matching logic holes left; some combines may just be missing regression tests.

Should fix:
https://llvm.org/bugs/show_bug.cgi?id=28296

Differential Revision: https://reviews.llvm.org/D27933

llvm-svn: 294049
This commit is contained in:
Sanjay Patel 2017-02-03 21:43:34 +00:00
parent 2532431332
commit 73fc8ddb06
15 changed files with 42 additions and 35 deletions

View File

@ -40,21 +40,29 @@ class DbgDeclareInst;
class MemIntrinsic;
class MemSetInst;
/// \brief Assign a complexity or rank value to LLVM Values.
/// Assign a complexity or rank value to LLVM Values. This is used to reduce
/// the amount of pattern matching needed for compares and commutative
/// instructions. For example, if we have:
/// icmp ugt X, Constant
/// or
/// xor (add X, Constant), cast Z
///
/// We do not have to consider the commuted variants of these patterns because
/// canonicalization based on complexity guarantees the above ordering.
///
/// This routine maps IR values to various complexity ranks:
/// 0 -> undef
/// 1 -> Constants
/// 2 -> Other non-instructions
/// 3 -> Arguments
/// 3 -> Unary operations
/// 4 -> Other instructions
/// 4 -> Cast and (f)neg/not instructions
/// 5 -> Other instructions
static inline unsigned getComplexity(Value *V) {
if (isa<Instruction>(V)) {
if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
BinaryOperator::isNot(V))
return 3;
return 4;
if (isa<CastInst>(V) || BinaryOperator::isNeg(V) ||
BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V))
return 4;
return 5;
}
if (isa<Argument>(V))
return 3;

View File

@ -100,7 +100,7 @@ define i32 @test9(i32 %A) {
define i1 @test10(i8 %A, i8 %b) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[B:%.*]] = sub i8 0, %b
; CHECK-NEXT: [[C:%.*]] = icmp ne i8 %A, [[B]]
; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[B]], %A
; CHECK-NEXT: ret i1 [[C]]
;
%B = add i8 %A, %b
@ -112,7 +112,7 @@ define i1 @test10(i8 %A, i8 %b) {
define <2 x i1> @test10vec(<2 x i8> %a, <2 x i8> %b) {
; CHECK-LABEL: @test10vec(
; CHECK-NEXT: [[C:%.*]] = sub <2 x i8> zeroinitializer, %b
; CHECK-NEXT: [[D:%.*]] = icmp ne <2 x i8> %a, [[C]]
; CHECK-NEXT: [[D:%.*]] = icmp ne <2 x i8> [[C]], %a
; CHECK-NEXT: ret <2 x i1> [[D]]
;
%c = add <2 x i8> %a, %b

View File

@ -176,7 +176,7 @@ define i8 @test16(i8 %A) {
define i8 @test17(i8 %X, i8 %Y) {
; CHECK-LABEL: @test17(
; CHECK-NEXT: [[Y_NOT:%.*]] = xor i8 %Y, -1
; CHECK-NEXT: [[D:%.*]] = or i8 %X, [[Y_NOT]]
; CHECK-NEXT: [[D:%.*]] = or i8 [[Y_NOT]], %X
; CHECK-NEXT: ret i8 [[D]]
;
%B = xor i8 %X, -1

View File

@ -50,7 +50,7 @@ define i19 @test5(i19 %A, i19 %Bok, i19 %Cok) {
define i57 @test6(i57 %A, i57 %B) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[B_NOT:%.*]] = xor i57 %B, -1
; CHECK-NEXT: [[D:%.*]] = and i57 %A, [[B_NOT]]
; CHECK-NEXT: [[D:%.*]] = and i57 [[B_NOT]], %A
; CHECK-NEXT: ret i57 [[D]]
;
%C = and i57 %A, %B

View File

@ -144,7 +144,7 @@ chk2:
ret i8* %n
; CHECK-LABEL: compare_ret_escape
; CHECK: %cmp = icmp eq i8* %n, %c
; CHECK: %cmp2 = icmp eq i32* %bc, %lgp
; CHECK: %cmp2 = icmp eq i32* %lgp, %bc
}
; The malloc call for %m cannot be elided since it is used in the call to function f.

View File

@ -918,7 +918,7 @@ define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %i to i16
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 %j to i16
; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], [[GEP1_IDX]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP2]]
; CHECK-NEXT: ret i1 [[TMP3]]
;
%bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
@ -949,7 +949,7 @@ define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) {
; CHECK-LABEL: @test60_addrspacecast_smaller(
; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i16 %i, 2
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %j to i16
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[GEP1_IDX]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]]
; CHECK-NEXT: ret i1 [[TMP2]]
;
%bit = addrspacecast i8* %foo to i32 addrspace(1)*
@ -981,7 +981,7 @@ define i1 @test61(i8* %foo, i64 %i, i64 %j) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i64 %i
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8* %foo, i64 %j
; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8*
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[CAST1]], [[GEP2]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8* [[GEP2]], [[CAST1]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%bit = bitcast i8* %foo to i32*
@ -999,7 +999,7 @@ define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 %i
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* %foo, i16 %j
; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)*
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 addrspace(1)* [[CAST1]], [[GEP2]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 addrspace(1)* [[GEP2]], [[CAST1]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
@ -2807,7 +2807,7 @@ define i1 @cmp_sle_rhs_inc(float %x, i32 %y) {
; CHECK-LABEL: @cmp_sle_rhs_inc(
; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32
; CHECK-NEXT: [[INC:%.*]] = add
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV]], [[INC]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[INC]], [[CONV]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%conv = fptosi float %x to i32
@ -2820,7 +2820,7 @@ define i1 @cmp_ule_rhs_inc(float %x, i32 %y) {
; CHECK-LABEL: @cmp_ule_rhs_inc(
; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32
; CHECK-NEXT: [[INC:%.*]] = add
; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[CONV]], [[INC]]
; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[INC]], [[CONV]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%conv = fptosi float %x to i32
@ -2833,7 +2833,7 @@ define i1 @cmp_slt_rhs_dec(float %x, i32 %y) {
; CHECK-LABEL: @cmp_slt_rhs_dec(
; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32
; CHECK-NEXT: [[DEC:%.*]] = {{add|sub}}
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], [[DEC]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[CONV]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%conv = fptosi float %x to i32
@ -2846,7 +2846,7 @@ define i1 @cmp_ult_rhs_dec(float %x, i32 %y) {
; CHECK-LABEL: @cmp_ult_rhs_dec(
; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32
; CHECK-NEXT: [[DEC:%.*]] = {{add|sub}}
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[DEC]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[DEC]], [[CONV]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%conv = fptosi float %x to i32

View File

@ -212,8 +212,7 @@ endif:
; FIXME:
; Narrowing should work with an 'xor' and is not limited to bool types.
; FIXME:
; We should either canonicalize based on complexity or enhance the pattern matching to catch this commuted variant.
; Test that commuting the xor operands does not inhibit optimization.
define i32 @shrinkLogicAndPhi2(i8 %x, i1 %cond) {
; CHECK-LABEL: @shrinkLogicAndPhi2(
@ -224,7 +223,7 @@ define i32 @shrinkLogicAndPhi2(i8 %x, i1 %cond) {
; CHECK: endif:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
; CHECK-NEXT: [[LOGIC:%.*]] = xor i32 [[ZEXT]], [[PHI]]
; CHECK-NEXT: [[LOGIC:%.*]] = xor i32 [[PHI]], [[ZEXT]]
; CHECK-NEXT: ret i32 [[LOGIC]]
;
entry:

View File

@ -490,7 +490,7 @@ define i32 @orsext_to_sel_multi_use(i32 %x, i1 %y) {
; CHECK-LABEL: @orsext_to_sel_multi_use(
; CHECK-NEXT: [[SEXT:%.*]] = sext i1 %y to i32
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEXT]], %x
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEXT]], [[OR]]
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[OR]], [[SEXT]]
; CHECK-NEXT: ret i32 [[ADD]]
;
%sext = sext i1 %y to i32

View File

@ -190,7 +190,7 @@ define <2 x i1> @test62vec(<2 x i1> %A, <2 x i1> %B) {
define i1 @test63(i1 %A, i1 %B) {
; CHECK-LABEL: @test63(
; CHECK-NEXT: [[NOT:%.*]] = xor i1 %A, true
; CHECK-NEXT: [[C:%.*]] = or i1 %B, [[NOT]]
; CHECK-NEXT: [[C:%.*]] = or i1 [[NOT]], %B
; CHECK-NEXT: ret i1 [[C]]
;
%not = xor i1 %A, true
@ -201,7 +201,7 @@ define i1 @test63(i1 %A, i1 %B) {
define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) {
; CHECK-LABEL: @test63vec(
; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> %A, <i1 true, i1 true>
; CHECK-NEXT: [[C:%.*]] = or <2 x i1> %B, [[NOT]]
; CHECK-NEXT: [[C:%.*]] = or <2 x i1> [[NOT]], %B
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%not = xor <2 x i1> %A, <i1 true, i1 true>

View File

@ -54,7 +54,7 @@ define i32 @test5(i32 %A, i32 %B, i32 %C) {
define i32 @test6(i32 %A, i32 %B) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[B_NOT:%.*]] = xor i32 %B, -1
; CHECK-NEXT: [[D:%.*]] = and i32 %A, [[B_NOT]]
; CHECK-NEXT: [[D:%.*]] = and i32 [[B_NOT]], %A
; CHECK-NEXT: ret i32 [[D]]
;
%C = and i32 %A, %B
@ -617,7 +617,7 @@ define i32 @test45(i32 %x, i32 %y) {
define i32 @test46(i32 %x, i32 %y) {
; CHECK-LABEL: @test46(
; CHECK-NEXT: [[X_NOT:%.*]] = xor i32 %x, -1
; CHECK-NEXT: [[SUB:%.*]] = and i32 %y, [[X_NOT]]
; CHECK-NEXT: [[SUB:%.*]] = and i32 [[X_NOT]], %y
; CHECK-NEXT: ret i32 [[SUB]]
;
%or = or i32 %x, %y

View File

@ -67,7 +67,7 @@ define i64 @test3(float %f, double %d) {
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
; CHECK-NEXT: ret i64 [[TMP15]]
;
%v00 = insertelement <4 x float> undef, float %f, i32 0

View File

@ -6,7 +6,7 @@ define <4 x i32> @psignd_3(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, %a
; CHECK-NEXT: [[B_LOBIT:%.*]] = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT: [[T1:%.*]] = xor <4 x i32> [[B_LOBIT]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: [[T2:%.*]] = and <4 x i32> %a, [[T1]]
; CHECK-NEXT: [[T2:%.*]] = and <4 x i32> [[T1]], %a
; CHECK-NEXT: [[T3:%.*]] = and <4 x i32> [[B_LOBIT]], [[SUB]]
; CHECK-NEXT: [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]]
; CHECK-NEXT: ret <4 x i32> [[COND]]

View File

@ -781,7 +781,7 @@ define i64 @test(float %f, double %d) {
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
; CHECK-NEXT: ret i64 [[TMP15]]
;
%v00 = insertelement <4 x float> undef, float %f, i32 0
@ -861,7 +861,7 @@ define i64 @test2(float %f, double %d) {
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
; CHECK-NEXT: ret i64 [[TMP15]]
;
%v00 = insertelement <4 x float> undef, float %f, i32 0

View File

@ -321,7 +321,7 @@ define i32 @test25(i32 %g, i32 %h) {
define i32 @test26(i32 %a, i32 %b) {
; CHECK-LABEL: @test26(
; CHECK-NEXT: [[T4:%.*]] = and i32 %a, %b
; CHECK-NEXT: [[T4:%.*]] = and i32 %b, %a
; CHECK-NEXT: ret i32 [[T4]]
;
%b2 = xor i32 %b, -1

View File

@ -110,7 +110,7 @@ define i32 @test6(i32 %x) {
define i32 @test7(i32 %a, i32 %b) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[B_NOT:%.*]] = xor i32 %b, -1
; CHECK-NEXT: [[XOR:%.*]] = or i32 %a, [[B_NOT]]
; CHECK-NEXT: [[XOR:%.*]] = or i32 [[B_NOT]], %a
; CHECK-NEXT: ret i32 [[XOR]]
;
%or = or i32 %a, %b
@ -123,7 +123,7 @@ define i32 @test7(i32 %a, i32 %b) {
define i32 @test8(i32 %a, i32 %b) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[B_NOT:%.*]] = xor i32 %b, -1
; CHECK-NEXT: [[XOR:%.*]] = or i32 %a, [[B_NOT]]
; CHECK-NEXT: [[XOR:%.*]] = or i32 [[B_NOT]], %a
; CHECK-NEXT: ret i32 [[XOR]]
;
%neg = xor i32 %a, -1