[X86][SSE] Enable min/max partial reduction

As mentioned on D65047 / rL366933 the plan is to enable partial reduction handling wherever possible.

llvm-svn: 368016
This commit is contained in:
Simon Pilgrim 2019-08-06 11:00:34 +00:00
parent 23cd0da9e9
commit c6735aecfa
5 changed files with 219 additions and 231 deletions

View File

@ -35384,7 +35384,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
ISD::NodeType BinOp;
SDValue Src = DAG.matchBinOpReduction(
Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});
Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true);
if (!Src)
return SDValue();

View File

@ -1859,13 +1859,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
;
; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI12_0, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -1894,13 +1891,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
;
; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
@ -1942,13 +1936,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
;
; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI13_0, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -1977,13 +1968,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
;
; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
@ -2047,15 +2035,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
;
; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI14_0, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: xorb $127, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -2106,15 +2091,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
;
; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: xorb $127, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
@ -2181,15 +2163,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
;
; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI15_0, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: xorb $127, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -2240,15 +2219,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
;
; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: xorb $127, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq

View File

@ -1863,13 +1863,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
;
; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI12_0, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -1898,13 +1895,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
;
; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
@ -1946,13 +1940,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
;
; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI13_0, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -1981,13 +1972,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
;
; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
@ -2051,15 +2039,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
;
; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI14_0, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: xorb $-128, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -2110,15 +2095,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
;
; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: xorb $-128, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
@ -2185,15 +2167,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
;
; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI15_0, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: xorb $-128, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -2244,15 +2223,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
;
; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: xorb $-128, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq

View File

@ -2061,13 +2061,11 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
;
; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: notl %eax
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -2104,18 +2102,37 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
; X64-AVX1-LABEL: test_reduce_v16i16_v8i16:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notl %eax
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_reduce_v16i16_v8i16:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notl %eax
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: test_reduce_v16i16_v8i16:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notl %eax
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
%1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp ugt <16 x i16> %a0, %1
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
@ -2164,13 +2181,11 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
;
; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: notl %eax
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -2207,18 +2222,37 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
; X64-AVX1-LABEL: test_reduce_v32i16_v8i16:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notl %eax
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_reduce_v32i16_v8i16:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notl %eax
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: test_reduce_v32i16_v8i16:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notl %eax
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
%1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp ugt <32 x i16> %a0, %1
%3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
@ -2264,15 +2298,13 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
;
; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: notb %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -2306,20 +2338,43 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
; X64-AVX1-LABEL: test_reduce_v32i8_v16i8:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_reduce_v32i8_v16i8:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: test_reduce_v32i8_v16i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp ugt <32 x i8> %a0, %1
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
@ -2368,15 +2423,13 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
;
; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: notb %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: retl
@ -2410,20 +2463,43 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
; X64-AVX1-LABEL: test_reduce_v64i8_v16i8:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
;
; X64-AVX2-LABEL: test_reduce_v64i8_v16i8:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
;
; X64-AVX512-LABEL: test_reduce_v64i8_v16i8:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = icmp ugt <64 x i8> %a0, %1
%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1

View File

@ -1922,12 +1922,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
;
; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
@ -1964,12 +1959,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
;
; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: vzeroupper
@ -2019,12 +2009,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
;
; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: vzeroupper
@ -2061,12 +2046,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
;
; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: vzeroupper
@ -2113,14 +2093,9 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
;
; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
@ -2154,14 +2129,9 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
;
; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper
@ -2211,14 +2181,9 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
;
; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: vzeroupper
@ -2252,14 +2217,9 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
;
; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: vzeroupper