X86: Resolve a long standing FIXME and properly isel pextr[bw].

Generalize the AArch64 .td nodes for AssertZext and AssertSext. Use
them to match the relevant pextr store instructions.

The test widen_load-2.ll requires a slight change because with the
stores gone, the remaining instructions are scheduled in a different
order.

Add test cases for SSE4 and AVX variants.

Resolves rdar://13414672.

Patch by Adam Nemet <anemet@apple.com>.

llvm-svn: 200957
This commit is contained in:
Jim Grosbach 2014-02-07 00:16:33 +00:00
parent cdf2c8be58
commit e9008de652
6 changed files with 33 additions and 19 deletions

View File

@ -492,6 +492,12 @@ def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN",
// Do not use cvt directly. Use cvt forms below
def cvt : SDNode<"ISD::CONVERT_RNDSAT", SDTConvertOp>;
def SDT_assertext : SDTypeProfile<1, 1,
[SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
//===----------------------------------------------------------------------===//
// Selection DAG Condition Codes

View File

@ -64,11 +64,6 @@ def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
[SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
def SDT_assertext : SDTypeProfile<1, 1,
[SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
//===----------------------------------------------------------------------===//
// Addressing-mode instantiations
//===----------------------------------------------------------------------===//

View File

@ -494,11 +494,6 @@ is memory.
//===---------------------------------------------------------------------===//
SSE4 extract-to-mem ops aren't being pattern matched because of the AssertZext
sitting between the truncate and the extract.
//===---------------------------------------------------------------------===//
INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert
any number of 0.0 simultaneously. Currently we only use it for simple
insertions.

View File

@ -6210,10 +6210,8 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>;
// FIXME:
// There's an AssertZext in the way of writing the store pattern
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
[(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1),
imm:$src2)))), addr:$dst)]>;
}
let Predicates = [HasAVX] in
@ -6236,10 +6234,8 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>;
// FIXME:
// There's an AssertZext in the way of writing the store pattern
// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
[(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1),
imm:$src2)))), addr:$dst)]>;
}
let Predicates = [HasAVX] in

View File

@ -0,0 +1,22 @@
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.1 | FileCheck %s -check-prefix=SSE41
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) {
; AVX: vpextrb
; SSE41: pextrb
; AVX-NOT: movb
; SSE41-NOT: movb
%vecext = extractelement <16 x i8> %foo, i32 15
store i8 %vecext, i8* %dst, align 1
ret void
}
define void @pextrw(i16* nocapture %dst, <8 x i16> %foo) {
; AVX: vpextrw
; SSE41: pextrw
; AVX-NOT: movw
; SSE41-NOT: movw
%vecext = extractelement <8 x i16> %foo, i32 15
store i16 %vecext, i16* %dst, align 1
ret void
}

View File

@ -149,9 +149,9 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
; CHECK: movdqa
; CHECK: paddb
; CHECK: paddb
; CHECK: movq
; CHECK: pextrb
; CHECK: pextrw
; CHECK: movq
; CHECK: ret
%a = load %i8vec31* %ap, align 16
%b = load %i8vec31* %bp, align 16