diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f5fe32032648..b6a3dd2f370b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1412,9 +1412,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom); if (Subtarget.hasDQI()) { - setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); @@ -1708,6 +1705,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v2i1, &X86::VK2RegClass); + setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); setOperationAction(ISD::SETCC, MVT::v4i1, Custom); setOperationAction(ISD::SETCC, MVT::v2i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 9a7e34aa7b8a..0ad33b5c3cb1 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2059,9 +2059,6 @@ let Predicates = [HasBWI] in { VEX, PD, VEX_W; defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, VEX, XD; -} - -let Predicates = [HasBWI] in { defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, VEX, PS, VEX_W; defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, @@ -2101,8 +2098,27 @@ let Predicates = [HasDQI] in { (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>; def : Pat<(store VK2:$src, addr:$dst), (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>; + def : Pat<(store VK1:$src, addr:$dst), + (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; } let Predicates = [HasAVX512, NoDQI] in { + def : Pat<(store VK1:$src, addr:$dst), + (MOV8mr addr:$dst, + (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), + sub_8bit))>; + def : Pat<(store VK2:$src, addr:$dst), + (MOV8mr addr:$dst, + (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK2:$src, VK16)), + sub_8bit))>; + def : Pat<(store VK4:$src, addr:$dst), + (MOV8mr addr:$dst, + (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK4:$src, VK16)), + sub_8bit))>; + def : Pat<(store VK8:$src, addr:$dst), + (MOV8mr addr:$dst, + (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), + sub_8bit))>; + def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst), (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>; def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), @@ -2182,6 +2198,17 @@ def : Pat<(v32i1 (scalar_to_vector VK1:$src)), def : Pat<(v64i1 (scalar_to_vector VK1:$src)), (COPY_TO_REGCLASS VK1:$src, VK64)>; +def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; +def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; +def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; + +def truncstorei1 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i1; +}]>; + +def : Pat<(truncstorei1 GR8:$src, addr:$dst), + (MOV8mr addr:$dst, GR8:$src)>; // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512, NoDQI] in { @@ -6562,28 +6589,6 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; -def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; -def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; -def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; - -def : Pat<(store VK1:$src, addr:$dst), - (MOV8mr addr:$dst, - (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), - sub_8bit))>, Requires<[HasAVX512, NoDQI]>; - -def : Pat<(store VK8:$src, addr:$dst), - (MOV8mr addr:$dst, - (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), - sub_8bit))>, Requires<[HasAVX512, NoDQI]>; - -def truncstorei1 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i1; -}]>; - -def : Pat<(truncstorei1 GR8:$src, addr:$dst), - (MOV8mr addr:$dst, GR8:$src)>; - multiclass cvt_by_vec_width opc, X86VectorVTInfo Vec, string OpcodeStr > { def rr : AVX512XS8I %a, <2 x i1>* %addr) { store <2 x i1> %a, <2 x i1>* %addr ret void } + +define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { +; KNL-LABEL: store_v1i1: +; KNL: ## BB#0: +; KNL-NEXT: andl $1, %edi +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kxnorw %k0, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: movb %al, (%rsi) +; KNL-NEXT: retq +; +; SKX-LABEL: store_v1i1: +; SKX: ## BB#0: +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: kmovw %edi, %k0 +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rsi) +; SKX-NEXT: retq + %x = xor <1 x i1> %c, + store <1 x i1> %x, <1 x i1>* %ptr, align 4 + ret void +} + +define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { +; KNL-LABEL: store_v2i1: +; KNL: ## BB#0: +; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vpextrq $1, %xmm0, %rax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: vmovq %xmm0, %rax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: store_v2i1: +; SKX: ## BB#0: +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq + %x = xor <2 x i1> %c, + store <2 x i1> %x, <2 x i1>* %ptr, align 4 + ret void +} + +define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { +; KNL-LABEL: store_v4i1: +; KNL: ## BB#0: +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 +; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vpextrd $3, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: vpextrd $2, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: vpextrd $1, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: vmovd %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: store_v4i1: +; SKX: ## BB#0: +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq + %x = xor <4 x i1> %c, + store <4 x i1> %x, <4 x i1>* %ptr, align 4 + ret void +} + +define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { +; KNL-LABEL: store_v8i1: +; KNL: ## BB#0: +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 +; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: movb %al, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: store_v8i1: +; SKX: ## BB#0: +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k0 +; SKX-NEXT: knotb %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq + %x = xor <8 x i1> %c, + store <8 x i1> %x, <8 x i1>* %ptr, align 4 + ret void +} + +define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { +; KNL-LABEL: store_v16i1: +; KNL: ## BB#0: +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: knotw %k0, %k0 +; KNL-NEXT: kmovw %k0, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: store_v16i1: +; SKX: ## BB#0: +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovw %k0, (%rdi) +; SKX-NEXT: retq + %x = xor <16 x i1> %c, + store <16 x i1> %x, <16 x i1>* %ptr, align 4 + ret void +} + +;void f2(int); +;void f1(int c) +;{ +; static int v = 0; +; if (v == 0) +; v = 1; +; else +; v = 0; +; f2(v); +;} + +@f1.v = internal unnamed_addr global i1 false, align 4 + +define void @f1(i32 %c) { +; KNL-LABEL: f1: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: movzbl {{.*}}(%rip), %edi +; KNL-NEXT: andl $1, %edi +; KNL-NEXT: movl %edi, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: kmovw %eax, %k0 +; KNL-NEXT: kxnorw %k0, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: movb %al, {{.*}}(%rip) +; KNL-NEXT: xorl $1, %edi +; KNL-NEXT: jmp _f2 ## TAILCALL +; +; SKX-LABEL: f1: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: movzbl {{.*}}(%rip), %edi +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: movl %edi, %eax +; SKX-NEXT: andl $1, %eax +; SKX-NEXT: kmovw %eax, %k0 +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: kmovb %k0, {{.*}}(%rip) +; SKX-NEXT: xorl $1, %edi +; SKX-NEXT: jmp _f2 ## TAILCALL +entry: + %.b1 = load i1, i1* @f1.v, align 4 + %not..b1 = xor i1 %.b1, true + store i1 %not..b1, i1* @f1.v, align 4 + %0 = zext i1 %not..b1 to i32 + tail call void @f2(i32 %0) #2 + ret void +} + +declare void @f2(i32) #1 +