[AVX512] Enabling bit logic lowering

Added lowering tests.

llvm-svn: 224132
This commit is contained in:
Robert Khasanov 2014-12-12 17:02:18 +00:00
parent 665027dbc4
commit 37c3ad6c20
4 changed files with 247 additions and 0 deletions

View File

@ -1570,6 +1570,13 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SETCC, MVT::v4i1, Custom); setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v2i1, Custom); setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
setOperationAction(ISD::XOR, MVT::v8i32, Legal);
setOperationAction(ISD::AND, MVT::v4i32, Legal);
setOperationAction(ISD::OR, MVT::v4i32, Legal);
setOperationAction(ISD::XOR, MVT::v4i32, Legal);
} }
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion

View File

@ -2922,6 +2922,7 @@ let isCodeGenOnly = 1 in {
/// ///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> { SDNode OpNode> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, !strconcat(OpcodeStr, "ps"), f256mem,
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
@ -2952,6 +2953,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(loadv2i64 addr:$src2)))], 0>, (loadv2i64 addr:$src2)))], 0>,
PD, VEX_4V; PD, VEX_4V;
}
let Constraints = "$src1 = $dst" in { let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,

View File

@ -0,0 +1,101 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
; CHECK-LABEL: vpandd
; CHECK: vpandd %zmm
; CHECK: ret
define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = and <16 x i32> %a2, %b
ret <16 x i32> %x
}
; CHECK-LABEL: vpord
; CHECK: vpord %zmm
; CHECK: ret
define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = or <16 x i32> %a2, %b
ret <16 x i32> %x
}
; CHECK-LABEL: vpxord
; CHECK: vpxord %zmm
; CHECK: ret
define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = xor <16 x i32> %a2, %b
ret <16 x i32> %x
}
; CHECK-LABEL: vpandq
; CHECK: vpandq %zmm
; CHECK: ret
define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%x = and <8 x i64> %a2, %b
ret <8 x i64> %x
}
; CHECK-LABEL: vporq
; CHECK: vporq %zmm
; CHECK: ret
define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%x = or <8 x i64> %a2, %b
ret <8 x i64> %x
}
; CHECK-LABEL: vpxorq
; CHECK: vpxorq %zmm
; CHECK: ret
define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%x = xor <8 x i64> %a2, %b
ret <8 x i64> %x
}
; CHECK-LABEL: orq_broadcast
; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK: ret
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
ret <8 x i64> %b
}
; CHECK-LABEL: andd512fold
; CHECK: vpandd (%
; CHECK: ret
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
entry:
%a = load <16 x i32>* %x, align 4
%b = and <16 x i32> %y, %a
ret <16 x i32> %b
}
; CHECK-LABEL: andqbrst
; CHECK: vpandq (%rdi){1to8}, %zmm
; CHECK: ret
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
entry:
%a = load i64* %ap, align 8
%b = insertelement <8 x i64> undef, i64 %a, i32 0
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%d = and <8 x i64> %p1, %c
ret <8 x i64>%d
}

View File

@ -0,0 +1,137 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s
; 256-bit
; CHECK-LABEL: vpandd256
; CHECK: vpandd %ymm
; CHECK: ret
define <8 x i32> @vpandd256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = and <8 x i32> %a2, %b
ret <8 x i32> %x
}
; CHECK-LABEL: vpord256
; CHECK: vpord %ymm
; CHECK: ret
define <8 x i32> @vpord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = or <8 x i32> %a2, %b
ret <8 x i32> %x
}
; CHECK-LABEL: vpxord256
; CHECK: vpxord %ymm
; CHECK: ret
define <8 x i32> @vpxord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%x = xor <8 x i32> %a2, %b
ret <8 x i32> %x
}
; CHECK-LABEL: vpandq256
; CHECK: vpandq %ymm
; CHECK: ret
define <4 x i64> @vpandq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = and <4 x i64> %a2, %b
ret <4 x i64> %x
}
; CHECK-LABEL: vporq256
; CHECK: vporq %ymm
; CHECK: ret
define <4 x i64> @vporq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = or <4 x i64> %a2, %b
ret <4 x i64> %x
}
; CHECK-LABEL: vpxorq256
; CHECK: vpxorq %ymm
; CHECK: ret
define <4 x i64> @vpxorq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = xor <4 x i64> %a2, %b
ret <4 x i64> %x
}
; 128-bit
; CHECK-LABEL: vpandd128
; CHECK: vpandd %xmm
; CHECK: ret
define <4 x i32> @vpandd128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
%x = and <4 x i32> %a2, %b
ret <4 x i32> %x
}
; CHECK-LABEL: vpord128
; CHECK: vpord %xmm
; CHECK: ret
define <4 x i32> @vpord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
%x = or <4 x i32> %a2, %b
ret <4 x i32> %x
}
; CHECK-LABEL: vpxord128
; CHECK: vpxord %xmm
; CHECK: ret
define <4 x i32> @vpxord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
%x = xor <4 x i32> %a2, %b
ret <4 x i32> %x
}
; CHECK-LABEL: vpandq128
; CHECK: vpandq %xmm
; CHECK: ret
define <2 x i64> @vpandq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = and <2 x i64> %a2, %b
ret <2 x i64> %x
}
; CHECK-LABEL: vporq128
; CHECK: vporq %xmm
; CHECK: ret
define <2 x i64> @vporq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = or <2 x i64> %a2, %b
ret <2 x i64> %x
}
; CHECK-LABEL: vpxorq128
; CHECK: vpxorq %xmm
; CHECK: ret
define <2 x i64> @vpxorq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = xor <2 x i64> %a2, %b
ret <2 x i64> %x
}