[x86] eliminate unnecessary vector compare for AVX masked store
The masked store instruction only cares about the sign-bit of each mask element, so the compare s<0 isn't needed. As noted in PR11210: https://bugs.llvm.org/show_bug.cgi?id=11210 ...fixing this should allow us to eliminate x86-specific masked store intrinsics in IR. (Although more testing will be needed to confirm that.) I filed a bug to track improvements for AVX512: https://bugs.llvm.org/show_bug.cgi?id=34584 Differential Revision: https://reviews.llvm.org/D37446 llvm-svn: 313089
This commit is contained in:
parent
079e746814
commit
659279450e
|
@ -33145,8 +33145,33 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
|
||||||
if (Mst->isCompressingStore())
|
if (Mst->isCompressingStore())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
if (!Mst->isTruncatingStore())
|
if (!Mst->isTruncatingStore()) {
|
||||||
return reduceMaskedStoreToScalarStore(Mst, DAG);
|
if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
|
||||||
|
return ScalarStore;
|
||||||
|
|
||||||
|
// If the mask is checking (0 > X), we're creating a vector with all-zeros
|
||||||
|
// or all-ones elements based on the sign bits of X. AVX1 masked store only
|
||||||
|
// cares about the sign bit of each mask element, so eliminate the compare:
|
||||||
|
// mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X
|
||||||
|
// Note that by waiting to match an x86-specific PCMPGT node, we're
|
||||||
|
// eliminating potentially more complex matching of a setcc node which has
|
||||||
|
// a full range of predicates.
|
||||||
|
SDValue Mask = Mst->getMask();
|
||||||
|
if (Mask.getOpcode() == X86ISD::PCMPGT &&
|
||||||
|
ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) {
|
||||||
|
assert(Mask.getValueType() == Mask.getOperand(1).getValueType() &&
|
||||||
|
"Unexpected type for PCMPGT");
|
||||||
|
return DAG.getMaskedStore(
|
||||||
|
Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(),
|
||||||
|
Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand());
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: AVX512 targets should also be able to simplify something like the
|
||||||
|
// pattern above, but that pattern will be different. It will either need to
|
||||||
|
// match setcc more generally or match PCMPGTM later (in tablegen?).
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
// Resolve truncating stores.
|
// Resolve truncating stores.
|
||||||
EVT VT = Mst->getValue().getValueType();
|
EVT VT = Mst->getValue().getValueType();
|
||||||
|
|
|
@ -1140,21 +1140,18 @@ define <8 x double> @load_one_mask_bit_set5(<8 x double>* %addr, <8 x double> %v
|
||||||
ret <8 x double> %res
|
ret <8 x double> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: The mask bit for each data element is the most significant bit of the mask operand, so a compare isn't needed.
|
; The mask bit for each data element is the most significant bit of the mask operand, so a compare isn't needed.
|
||||||
|
; FIXME: The AVX512 code should be improved to use 'vpmovd2m'. Add tests for 512-bit vectors when implementing that.
|
||||||
|
|
||||||
define void @trunc_mask(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x i32> %mask) {
|
define void @trunc_mask(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x i32> %mask) {
|
||||||
; AVX-LABEL: trunc_mask:
|
; AVX-LABEL: trunc_mask:
|
||||||
; AVX: ## BB#0:
|
; AVX: ## BB#0:
|
||||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi)
|
||||||
; AVX-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
|
|
||||||
; AVX-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
|
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: trunc_mask:
|
; AVX512F-LABEL: trunc_mask:
|
||||||
; AVX512F: ## BB#0:
|
; AVX512F: ## BB#0:
|
||||||
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512F-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi)
|
||||||
; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
|
|
||||||
; AVX512F-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
|
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; SKX-LABEL: trunc_mask:
|
; SKX-LABEL: trunc_mask:
|
||||||
|
|
Loading…
Reference in New Issue