[AVX512] Add popcount support for v32i16 and v64i8.
llvm-svn: 266858
This commit is contained in:
parent
580c1b6952
commit
99e60e9f1f
|
@ -1528,6 +1528,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
setOperationAction(ISD::MLOAD, VT, Legal);
|
||||
setOperationAction(ISD::MSTORE, VT, Legal);
|
||||
setOperationAction(ISD::CTPOP, VT, Custom);
|
||||
|
||||
setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
|
||||
setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
|
||||
|
@ -20580,7 +20581,7 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL,
|
|||
int NumByteElts = VecSize / 8;
|
||||
MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
|
||||
SDValue In = DAG.getBitcast(ByteVecVT, Op);
|
||||
SmallVector<SDValue, 16> LUTVec;
|
||||
SmallVector<SDValue, 64> LUTVec;
|
||||
for (int i = 0; i < NumByteElts; ++i)
|
||||
LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
|
||||
SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, LUTVec);
|
||||
|
@ -20676,8 +20677,7 @@ static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL,
|
|||
static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
// FIXME: Need to add AVX-512 support here!
|
||||
assert((VT.is256BitVector() || VT.is128BitVector()) &&
|
||||
assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) &&
|
||||
"Unknown CTPOP type to handle");
|
||||
SDLoc DL(Op.getNode());
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
|
||||
|
||||
define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
|
||||
; ALL-LABEL: testv8i64:
|
||||
|
@ -106,51 +107,78 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
|
|||
}
|
||||
|
||||
define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
|
||||
; ALL-LABEL: testv32i16:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; ALL-NEXT: vpand %ymm2, %ymm0, %ymm3
|
||||
; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
|
||||
; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpshufb %ymm0, %ymm4, %ymm0
|
||||
; ALL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpsllw $8, %ymm0, %ymm3
|
||||
; ALL-NEXT: vpaddb %ymm0, %ymm3, %ymm0
|
||||
; ALL-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpand %ymm2, %ymm1, %ymm3
|
||||
; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; ALL-NEXT: vpshufb %ymm1, %ymm4, %ymm1
|
||||
; ALL-NEXT: vpaddb %ymm3, %ymm1, %ymm1
|
||||
; ALL-NEXT: vpsllw $8, %ymm1, %ymm2
|
||||
; ALL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
|
||||
; ALL-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; ALL-NEXT: retq
|
||||
; AVX512F-LABEL: testv32i16:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
|
||||
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpaddb %ymm0, %ymm3, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3
|
||||
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $8, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: testv32i16:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
|
||||
; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
|
||||
ret <32 x i16> %out
|
||||
}
|
||||
|
||||
define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
|
||||
; ALL-LABEL: testv64i8:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; ALL-NEXT: vpand %ymm2, %ymm0, %ymm3
|
||||
; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
|
||||
; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpshufb %ymm0, %ymm4, %ymm0
|
||||
; ALL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
|
||||
; ALL-NEXT: vpand %ymm2, %ymm1, %ymm3
|
||||
; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; ALL-NEXT: vpshufb %ymm1, %ymm4, %ymm1
|
||||
; ALL-NEXT: vpaddb %ymm3, %ymm1, %ymm1
|
||||
; ALL-NEXT: retq
|
||||
; AVX512F-LABEL: testv64i8:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
|
||||
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3
|
||||
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: testv64i8:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
|
||||
; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
|
||||
ret <64 x i8> %out
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue