From 32ad5e9c083e1aed26534011408aa1a1b8371921 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 11 Jun 2009 23:07:04 +0000 Subject: [PATCH] Misc x86 README updates: remove a couple of already-fixed issues, add a few suggestions from looking at some assembly code. llvm-svn: 73210 --- llvm/lib/Target/X86/README.txt | 114 ++++++++++++++++----------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/X86/README.txt b/llvm/lib/Target/X86/README.txt index 710bd0357433..3796aac57cb5 100644 --- a/llvm/lib/Target/X86/README.txt +++ b/llvm/lib/Target/X86/README.txt @@ -482,35 +482,6 @@ _usesbb: //===---------------------------------------------------------------------===// -Currently we don't have elimination of redundant stack manipulations. Consider -the code: - -int %main() { -entry: - call fastcc void %test1( ) - call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) ) - ret int 0 -} - -declare fastcc void %test1() - -declare fastcc void %test2(sbyte*) - - -This currently compiles to: - - subl $16, %esp - call _test5 - addl $12, %esp - subl $16, %esp - movl $_test5, (%esp) - call _test6 - addl $12, %esp - -The add\sub pair is really unneeded here. - -//===---------------------------------------------------------------------===// - Consider the expansion of: define i32 @test3(i32 %X) { @@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0. //===---------------------------------------------------------------------===// -We aren't matching RMW instructions aggressively -enough. Here's a reduced testcase (more in PR1160): - -define void @test(i32* %huge_ptr, i32* %target_ptr) { - %A = load i32* %huge_ptr ; [#uses=1] - %B = load i32* %target_ptr ; [#uses=1] - %C = or i32 %A, %B ; [#uses=1] - store i32 %C, i32* %target_ptr - ret void -} - -$ llvm-as < t.ll | llc -march=x86-64 - -_test: - movl (%rdi), %eax - orl (%rsi), %eax - movl %eax, (%rsi) - ret - -That should be something like: - -_test: - movl (%rdi), %eax - orl %eax, (%rsi) - ret - -//===---------------------------------------------------------------------===// - The following code: bb114.preheader: ; preds = %cond_next94 @@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" //===---------------------------------------------------------------------===// + +Testcase: +int a(int x) { return (x & 127) > 31; } + +Current output: + movl 4(%esp), %eax + andl $127, %eax + cmpl $31, %eax + seta %al + movzbl %al, %eax + ret + +Ideal output: + xorl %eax, %eax + testl $96, 4(%esp) + setne %al + ret + +We could do this transformation in instcombine, but it's only clearly +beneficial on platforms with a test instruction. + +//===---------------------------------------------------------------------===// +Testcase: +int x(int a) { return (a&0xf0)>>4; } + +Current output: + movl 4(%esp), %eax + shrl $4, %eax + andl $15, %eax + ret + +Ideal output: + movzbl 4(%esp), %eax + shrl $4, %eax + ret + +//===---------------------------------------------------------------------===// + +Testcase: +int x(int a) { return (a & 0x80) ? 0x100 : 0; } + +Current output: + testl $128, 4(%esp) + setne %al + movzbl %al, %eax + shll $8, %eax + ret + +Ideal output: + movl 4(%esp), %eax + addl %eax, %eax + andl $256, %eax + ret + +We generally want to fold shifted tests of a single bit into a shift+and on x86. + +//===---------------------------------------------------------------------===//