[AVX512] In some cases KORTEST instruction may be used instead of ZEXT + TEST sequence.

Differential Revision: http://reviews.llvm.org/D23490

llvm-svn: 279960
This commit is contained in:
Igor Breger 2016-08-29 08:52:52 +00:00
parent 407f275894
commit 1a388871b9
7 changed files with 296 additions and 728 deletions

View File

@ -14900,15 +14900,29 @@ static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1) {
static SDValue EmitTEST_i1(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) {
// Most probably the value is in GPR, use ZEXT + CMP.
if(Op.getOpcode() == ISD::TRUNCATE ||
Op.getOpcode() == ISD::LOAD ||
Op.getOpcode() == ISD::CopyFromReg) {
SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op);
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp,
DAG.getConstant(0, dl, MVT::i8));
}
// Create cmp i1 that should be mapped to KORTEST.
return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op,
DAG.getConstant(0, dl, MVT::i8));
}
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return EmitTEST_i1(Op, DAG, dl);
// CF and OF aren't always set the way we want. Determine which
// of these we need.
bool NeedCF = false;

View File

@ -2476,6 +2476,10 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
def : Pat<(X86cmp VK1:$src, 0),
(KORTESTWrr (COPY_TO_REGCLASS VK1:$src, VK16),
(COPY_TO_REGCLASS VK1:$src, VK16))>, Requires<[HasAVX512]>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode> {

View File

@ -167,9 +167,7 @@ define i32 @test10(i64 %b, i64 %c, i1 %d) {
; ALL-NEXT: kmovw %eax, %k1
; ALL-NEXT: korw %k1, %k0, %k1
; ALL-NEXT: kxorw %k1, %k0, %k0
; ALL-NEXT: kmovw %k0, %eax
; ALL-NEXT: andl $1, %eax
; ALL-NEXT: testb %al, %al
; ALL-NEXT: kortestw %k0, %k0
; ALL-NEXT: je LBB8_1
; ALL-NEXT: ## BB#2: ## %if.end.i
; ALL-NEXT: movl $6, %eax

View File

@ -8,23 +8,19 @@ target triple = "x86_64-unknown-linux-gnu"
define void @func() {
; CHECK-LABEL: func:
; CHECK: ## BB#0: ## %L_10
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: kortestw %k0, %k0
; CHECK-NEXT: je LBB0_1
; CHECK-NEXT: ## BB#4: ## %L_30
; CHECK-NEXT: ## BB#3: ## %L_30
; CHECK-NEXT: retq
; CHECK-NEXT: LBB0_1: ## %bb56
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: jmp LBB0_2
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_3: ## %bb35
; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: LBB0_2: ## %bb33
; CHECK-NEXT: LBB0_2: ## %bb35
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: kortestw %k0, %k0
; CHECK-NEXT: LBB0_1: ## %bb33
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_2
; CHECK-NEXT: jmp LBB0_3
; CHECK-NEXT: kortestw %k0, %k0
; CHECK-NEXT: jne LBB0_1
; CHECK-NEXT: jmp LBB0_2
bb1:
br i1 undef, label %L_10, label %L_10

View File

@ -200,9 +200,7 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: je LBB10_2
; KNL-NEXT: ## BB#1: ## %A
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
@ -216,9 +214,7 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftlw $11, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: je LBB10_2
; SKX-NEXT: ## BB#1: ## %A
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
@ -244,9 +240,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
; KNL-NEXT: kunpckbw %k0, %k1, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: cmoveq %rsi, %rdi
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: retq
@ -258,9 +252,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
; SKX-NEXT: kunpckbw %k0, %k1, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: cmoveq %rsi, %rdi
; SKX-NEXT: movq %rdi, %rax
; SKX-NEXT: retq
@ -310,9 +302,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: cmoveq %rsi, %rdi
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: retq
@ -322,9 +312,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; SKX-NEXT: kshiftlb $3, %k0, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: cmoveq %rsi, %rdi
; SKX-NEXT: movq %rdi, %rax
; SKX-NEXT: retq
@ -1356,9 +1344,7 @@ define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) {
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: sete %al
; SKX-NEXT: addb $3, %al
; SKX-NEXT: movzbl %al, %eax
@ -1438,9 +1424,7 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) {
; SKX: ## BB#0:
; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftrq $63, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: sete %al
; SKX-NEXT: addb $3, %al
; SKX-NEXT: movzbl %al, %eax

View File

@ -1648,38 +1648,32 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: # implicit-def: %XMM1
; SKX-NEXT: testb %al, %al
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: # implicit-def: %XMM0
; SKX-NEXT: je .LBB29_2
; SKX-NEXT: # BB#1: # %cond.load
; SKX-NEXT: vmovq %xmm0, %rax
; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SKX-NEXT: vmovq %xmm1, %rax
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SKX-NEXT: .LBB29_2: # %else
; SKX-NEXT: kshiftlw $14, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: je .LBB29_4
; SKX-NEXT: # BB#3: # %cond.load1
; SKX-NEXT: vpextrq $1, %xmm0, %rax
; SKX-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1
; SKX-NEXT: vpextrq $1, %xmm1, %rax
; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
; SKX-NEXT: .LBB29_4: # %else2
; SKX-NEXT: kshiftlw $13, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: je .LBB29_6
; SKX-NEXT: # BB#5: # %cond.load4
; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0
; SKX-NEXT: vmovq %xmm0, %rax
; SKX-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm1
; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm1
; SKX-NEXT: vmovq %xmm1, %rax
; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
; SKX-NEXT: .LBB29_6: # %else5
; SKX-NEXT: vpblendmd %xmm1, %xmm3, %xmm0 {%k1}
; SKX-NEXT: vpblendmd %xmm0, %xmm3, %xmm0 {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test30:
@ -1692,38 +1686,32 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX_32-NEXT: kshiftlw $15, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: andl $1, %eax
; SKX_32-NEXT: # implicit-def: %XMM1
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; SKX_32-NEXT: kortestw %k0, %k0
; SKX_32-NEXT: # implicit-def: %XMM0
; SKX_32-NEXT: je .LBB29_2
; SKX_32-NEXT: # BB#1: # %cond.load
; SKX_32-NEXT: vmovd %xmm0, %eax
; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SKX_32-NEXT: vmovd %xmm1, %eax
; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SKX_32-NEXT: .LBB29_2: # %else
; SKX_32-NEXT: kshiftlw $14, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: andl $1, %eax
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: kortestw %k0, %k0
; SKX_32-NEXT: je .LBB29_4
; SKX_32-NEXT: # BB#3: # %cond.load1
; SKX_32-NEXT: vpextrd $1, %xmm0, %eax
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: vpextrd $1, %xmm1, %eax
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
; SKX_32-NEXT: .LBB29_4: # %else2
; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm2
; SKX_32-NEXT: kshiftlw $13, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: andl $1, %eax
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: kortestw %k0, %k0
; SKX_32-NEXT: je .LBB29_6
; SKX_32-NEXT: # BB#5: # %cond.load4
; SKX_32-NEXT: vpextrd $2, %xmm0, %eax
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: vpextrd $2, %xmm1, %eax
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
; SKX_32-NEXT: .LBB29_6: # %else5
; SKX_32-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; SKX_32-NEXT: vpblendmd %xmm0, %xmm2, %xmm0 {%k1}
; SKX_32-NEXT: addl $12, %esp
; SKX_32-NEXT: retl

File diff suppressed because it is too large Load Diff