During SelectionDAG building explicitly set a node to constant zero when the

value is zero.
This allows optmizations to kick in more easily.
Fix some test cases so that they remain meaningful (i.e., not completely dead
coded) when optimizations apply.

<rdar://problem/14096009> superfluous multiply by high part of zero-extended
value.

llvm-svn: 184222
This commit is contained in:
Quentin Colombet 2013-06-18 20:14:39 +00:00
parent 54c83695f7
commit b51a68681a
5 changed files with 49 additions and 5 deletions

View File

@ -718,6 +718,14 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
unsigned NumSignBits = LOI->NumSignBits;
unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
if (NumZeroBits == RegSize) {
// The current value is a zero.
// Explicitly express that as it would be easier for
// optimizations to kick in.
Parts[i] = DAG.getConstant(0, RegisterVT);
continue;
}
// FIXME: We capture more information than the dag can represent. For
// now, just use the tightest assertzext/assertsext possible.
bool isSExt = true;

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=arm | grep mvn | count 8
; RUN: llc < %s -march=arm | grep mvn | count 9
define i32 @f1() {
entry:

View File

@ -7,8 +7,10 @@ entry:
cond_next127: ; preds = %cond_next391, %entry
%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ] ; <i32> [#uses=1]
%tmp149 = mul i32 0, %v.1 ; <i32> [#uses=0]
%tmp254 = and i32 0, 15 ; <i32> [#uses=1]
%tmp256 = and i32 0, 15 ; <i32> [#uses=2]
%tmpss = load i32* %ss, align 4 ; <i32> [#uses=1]
%tmpbp = load i32* %bp, align 4 ; <i32> [#uses=2]
%tmp254 = and i32 %tmpss, 15 ; <i32> [#uses=1]
%tmp256 = and i32 %tmpbp, 15 ; <i32> [#uses=2]
br label %cond_next391
cond_next391: ; preds = %cond_next127

View File

@ -19,7 +19,7 @@ bb917: ; preds = %entry
ret i32 0
bb951: ; preds = %bb986, %entry
%tmp955 = sdiv i32 0, 2 ; <i32> [#uses=3]
%tmp955 = sdiv i32 %offset, 2 ; <i32> [#uses=3]
%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0 ; <i32*> [#uses=1]
br i1 %cond, label %bb986, label %bb967

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64
; RUN: llc < %s -march=x86-64 | FileCheck %s
; PR1198
define i64 @foo(i64 %x, i64 %y) {
@ -10,3 +10,37 @@ define i64 @foo(i64 %x, i64 %y) {
%tmp4 = trunc i128 %tmp3 to i64
ret i64 %tmp4
}
; <rdar://problem/14096009> superfluous multiply by high part of
; zero-extended value.
; CHECK: @mul1
; CHECK-NOT: imulq
; CHECK: mulq
; CHECK-NOT: imulq
define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) {
entry:
%conv = zext i64 %y to i128
%cmp11 = icmp eq i64 %n, 0
br i1 %cmp11, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body
%carry.013 = phi i64 [ %conv6, %for.body ], [ 0, %entry ]
%i.012 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i64* %x, i64 %i.012
%0 = load i64* %arrayidx, align 8
%conv2 = zext i64 %0 to i128
%mul = mul i128 %conv2, %conv
%conv3 = zext i64 %carry.013 to i128
%add = add i128 %mul, %conv3
%conv4 = trunc i128 %add to i64
%arrayidx5 = getelementptr inbounds i64* %z, i64 %i.012
store i64 %conv4, i64* %arrayidx5, align 8
%shr = lshr i128 %add, 64
%conv6 = trunc i128 %shr to i64
%inc = add i64 %i.012, 1
%exitcond = icmp eq i64 %inc, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret i64 0
}