AVX512: Add store mask patterns.

Differential Revision: http://reviews.llvm.org/D16596

llvm-svn: 258914
This commit is contained in:
Igor Breger 2016-01-27 08:43:25 +00:00
parent 0747c5c808
commit d6c187b038
3 changed files with 212 additions and 28 deletions

View File

@ -1412,9 +1412,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
if (Subtarget.hasDQI()) {
setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
@ -1708,6 +1705,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);

View File

@ -2059,9 +2059,6 @@ let Predicates = [HasBWI] in {
VEX, PD, VEX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
VEX, XD;
}
let Predicates = [HasBWI] in {
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
VEX, PS, VEX_W;
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
@ -2101,8 +2098,27 @@ let Predicates = [HasDQI] in {
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
def : Pat<(store VK2:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
def : Pat<(store VK1:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(store VK1:$src, addr:$dst),
(MOV8mr addr:$dst,
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
sub_8bit))>;
def : Pat<(store VK2:$src, addr:$dst),
(MOV8mr addr:$dst,
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK2:$src, VK16)),
sub_8bit))>;
def : Pat<(store VK4:$src, addr:$dst),
(MOV8mr addr:$dst,
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK4:$src, VK16)),
sub_8bit))>;
def : Pat<(store VK8:$src, addr:$dst),
(MOV8mr addr:$dst,
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
sub_8bit))>;
def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
(KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
@ -2182,6 +2198,17 @@ def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, VK64)>;
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
}]>;
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
(MOV8mr addr:$dst, GR8:$src)>;
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512, NoDQI] in {
@ -6562,28 +6589,6 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
def : Pat<(store VK1:$src, addr:$dst),
(MOV8mr addr:$dst,
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
def : Pat<(store VK8:$src, addr:$dst),
(MOV8mr addr:$dst,
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
}]>;
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
(MOV8mr addr:$dst, GR8:$src)>;
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),

View File

@ -1442,3 +1442,183 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
store <2 x i1> %a, <2 x i1>* %addr
ret void
}
define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
; KNL-LABEL: store_v1i1:
; KNL: ## BB#0:
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kxnorw %k0, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rsi)
; KNL-NEXT: retq
;
; SKX-LABEL: store_v1i1:
; SKX: ## BB#0:
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: kmovw %edi, %k0
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kmovb %k0, (%rsi)
; SKX-NEXT: retq
%x = xor <1 x i1> %c, <i1 1>
store <1 x i1> %x, <1 x i1>* %ptr, align 4
ret void
}
define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
; KNL-LABEL: store_v2i1:
; KNL: ## BB#0:
; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movb %al, (%rdi)
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movb %al, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: store_v2i1:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
; SKX-NEXT: vpmovq2m %xmm0, %k0
; SKX-NEXT: knotw %k0, %k0
; SKX-NEXT: kmovb %k0, (%rdi)
; SKX-NEXT: retq
%x = xor <2 x i1> %c, <i1 1, i1 1>
store <2 x i1> %x, <2 x i1>* %ptr, align 4
ret void
}
define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
; KNL-LABEL: store_v4i1:
; KNL: ## BB#0:
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpextrd $3, %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movb %al, (%rdi)
; KNL-NEXT: vpextrd $2, %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movb %al, (%rdi)
; KNL-NEXT: vpextrd $1, %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movb %al, (%rdi)
; KNL-NEXT: vmovd %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: movb %al, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: store_v4i1:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
; SKX-NEXT: vpmovd2m %xmm0, %k0
; SKX-NEXT: knotw %k0, %k0
; SKX-NEXT: kmovb %k0, (%rdi)
; SKX-NEXT: retq
%x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
store <4 x i1> %x, <4 x i1>* %ptr, align 4
ret void
}
define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
; KNL-LABEL: store_v8i1:
; KNL: ## BB#0:
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: store_v8i1:
; SKX: ## BB#0:
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
; SKX-NEXT: vpmovw2m %xmm0, %k0
; SKX-NEXT: knotb %k0, %k0
; SKX-NEXT: kmovb %k0, (%rdi)
; SKX-NEXT: retq
%x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
store <8 x i1> %x, <8 x i1>* %ptr, align 4
ret void
}
define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
; KNL-LABEL: store_v16i1:
; KNL: ## BB#0:
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: store_v16i1:
; SKX: ## BB#0:
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
; SKX-NEXT: vpmovb2m %xmm0, %k0
; SKX-NEXT: knotw %k0, %k0
; SKX-NEXT: kmovw %k0, (%rdi)
; SKX-NEXT: retq
%x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
store <16 x i1> %x, <16 x i1>* %ptr, align 4
ret void
}
;void f2(int);
;void f1(int c)
;{
; static int v = 0;
; if (v == 0)
; v = 1;
; else
; v = 0;
; f2(v);
;}
@f1.v = internal unnamed_addr global i1 false, align 4
define void @f1(i32 %c) {
; KNL-LABEL: f1:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: movzbl {{.*}}(%rip), %edi
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: movl %edi, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: kxnorw %k0, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, {{.*}}(%rip)
; KNL-NEXT: xorl $1, %edi
; KNL-NEXT: jmp _f2 ## TAILCALL
;
; SKX-LABEL: f1:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: movzbl {{.*}}(%rip), %edi
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: movl %edi, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kmovb %k0, {{.*}}(%rip)
; SKX-NEXT: xorl $1, %edi
; SKX-NEXT: jmp _f2 ## TAILCALL
entry:
%.b1 = load i1, i1* @f1.v, align 4
%not..b1 = xor i1 %.b1, true
store i1 %not..b1, i1* @f1.v, align 4
%0 = zext i1 %not..b1 to i32
tail call void @f2(i32 %0) #2
ret void
}
declare void @f2(i32) #1