Finish implementing a readme entry: when inserting an i64 variable

into a vector of zeros or undef, and when the top part is obviously
zero, we can just use movd + shuffle.  This allows us to compile
vec_set-B.ll into:

_test3:
	movl	$1234567, %eax
	andl	4(%esp), %eax
	movd	%eax, %xmm0
	ret

instead of:

_test3:
	subl	$28, %esp
	movl	$1234567, %eax
	andl	32(%esp), %eax
	movl	%eax, (%esp)
	movl	$0, 4(%esp)
	movq	(%esp), %xmm0
	addl	$28, %esp
	ret

llvm-svn: 48090
This commit is contained in:
Chris Lattner 2008-03-09 05:42:06 +00:00
parent 6af064641f
commit b6387c8a74
3 changed files with 29 additions and 43 deletions

View File

@ -781,41 +781,3 @@ LLVM should be able to generate the same thing as gcc. This looks like it is
just a matter of matching (scalar_to_vector (load x)) to movd.
//===---------------------------------------------------------------------===//
These two functions should compile to identical code on x86-32:
define <2 x i64> @test2(i64 %arg) {
entry:
%A = and i64 %arg, 1234567
%B = insertelement <2 x i64> undef, i64 %A, i32 0
ret <2 x i64> %B
}
define <2 x i64> @test2(i64 %arg) {
entry:
%A = and i64 %arg, 1234567
%B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
ret <2 x i64> %B
}
The later compiles to:
_test2:
movl $1234567, %eax
andl 4(%esp), %eax
movd %eax, %xmm0
ret
the former compiles to:
_test2:
subl $28, %esp
movl $1234567, %eax
andl 32(%esp), %eax
movl %eax, (%esp)
movl $0, 4(%esp)
movaps (%esp), %xmm0
addl $28, %esp
ret
//===---------------------------------------------------------------------===//

View File

@ -3063,11 +3063,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::UNDEF, VT);
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
return SDOperand();
// Special case for single non-zero element.
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
@ -3141,6 +3137,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
}
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
return SDOperand();
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
if (IsAllConstants)

View File

@ -0,0 +1,24 @@
; RUN: llvm-as < %s | llc -march=x86 | not grep movaps
; RUN: llvm-as < %s | llc -march=x86 | grep esp | count 2
; These should both generate something like this:
;_test3:
; movl $1234567, %eax
; andl 4(%esp), %eax
; movd %eax, %xmm0
; ret
define <2 x i64> @test3(i64 %arg) {
entry:
%A = and i64 %arg, 1234567
%B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
ret <2 x i64> %B
}
define <2 x i64> @test2(i64 %arg) {
entry:
%A = and i64 %arg, 1234567
%B = insertelement <2 x i64> undef, i64 %A, i32 0
ret <2 x i64> %B
}