[SimplifyLibCalls] Correctly set the is_zero_undef flag for llvm.cttz

If <src> is non-zero we can safely set the flag to true, and this
results in less code generated for, e.g. ffs(x) + 1 on FreeBSD.
Thanks to majnemer for suggesting the fix and reviewing.

Code generated before the patch was applied:


 0:   0f bc c7                bsf    %edi,%eax
 3:   b9 20 00 00 00          mov    $0x20,%ecx
 8:   0f 45 c8                cmovne %eax,%ecx
 b:   83 c1 02                add    $0x2,%ecx
 e:   b8 01 00 00 00          mov    $0x1,%eax
13:   85 ff                   test   %edi,%edi
15:   0f 45 c1                cmovne %ecx,%eax
18:   c3                      retq

Code generated after the patch was applied:

 0:   0f bc cf                bsf    %edi,%ecx
 3:   83 c1 02                add    $0x2,%ecx
 6:   85 ff                   test   %edi,%edi
 8:   b8 01 00 00 00          mov    $0x1,%eax
 d:   0f 45 c1                cmovne %ecx,%eax
10:   c3                      retq

It seems we can still use cmove and save another 'test' instruction, but
that can be tackled separately.

Differential Revision:  http://reviews.llvm.org/D11989	

llvm-svn: 244947
This commit is contained in:
Davide Italiano 2015-08-13 20:34:26 +00:00
parent ff1ca4af4b
commit a195386ca1
2 changed files with 4 additions and 4 deletions

View File

@ -1436,7 +1436,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
Type *ArgType = Op->getType();
Value *F =
Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz");
Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);

View File

@ -102,7 +102,7 @@ define i32 @test_simplify12() {
define i32 @test_simplify13(i32 %x) {
; CHECK-LABEL: @test_simplify13(
%ret = call i32 @ffs(i32 %x)
; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
@ -113,7 +113,7 @@ define i32 @test_simplify13(i32 %x) {
define i32 @test_simplify14(i32 %x) {
; CHECK-LINUX-LABEL: @test_simplify14(
%ret = call i32 @ffsl(i32 %x)
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
@ -124,7 +124,7 @@ define i32 @test_simplify14(i32 %x) {
define i32 @test_simplify15(i64 %x) {
; CHECK-LINUX-LABEL: @test_simplify15(
%ret = call i32 @ffsll(i64 %x)
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0