[X86] Redefine the 128-bit version of VPGATHERQD and VGATHERQPS to use a VK2 mask instead of a VK4 mask.

This allows us to remove extra extend creation during lowering and more accurately reflects the semantics of the instruction.

While there add an extra output VT to X86 masked gather node to better match the isel pattern predicate. Currently we're exploiting the fact that the isel table doesn't count how many output results a node actually has if the result type of any can be inferred from the first result and the type constraints defined in tablegen. I think we might ultimately want to lower all MGATHER/MSCATTER to an X86ISD node with the extra mask result and stop relying on this hole in the isel checking.

llvm-svn: 318278
This commit is contained in:
Craig Topper 2017-11-15 07:46:43 +00:00
parent 1c240a89ff
commit 16a91cee6c
3 changed files with 24 additions and 14 deletions

View File

@ -24368,11 +24368,10 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
// The mask should match the destination type. Extending mask with zeroes
// is not necessary since instruction itself reads only two values from
// memory.
Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(),
N->getMemOperand());
DAG.getVTList(MVT::v4i32, MVT::v2i1, MVT::Other), Ops, dl,
N->getMemoryVT(), N->getMemOperand());
SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
NewGather.getValue(0), DAG);
@ -24392,16 +24391,16 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) &&
Index.getOpcode() == ISD::CONCAT_VECTORS &&
Index.getOperand(1).isUndef()) {
Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false);
Mask = Mask.getOperand(0);
Index = Index.getOperand(0);
} else
return Op;
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(),
N->getMemOperand());
DAG.getVTList(MVT::v4f32, MVT::v2i1, MVT::Other), Ops, dl,
N->getMemoryVT(), N->getMemOperand());
SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) };
SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(2) };
return DAG.getMergeValues(RetOps, dl);
}

View File

@ -8196,15 +8196,16 @@ defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
// GATHER - SCATTER Operations
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag GatherNode> {
X86MemOperand memop, PatFrag GatherNode,
RegisterClass MaskRC = _.KRCWM> {
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
ExeDomain = _.ExeDomain in
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
(ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
(ins _.RC:$src1, MaskRC:$mask, memop:$src2),
!strconcat(OpcodeStr#_.Suffix,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set _.RC:$dst, _.KRCWM:$mask_wb,
(GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
[(set _.RC:$dst, MaskRC:$mask_wb,
(GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
vectoraddr:$src2))]>, EVEX, EVEX_K,
EVEX_CD8<_.EltSize, CD8VT1>;
}
@ -8241,7 +8242,8 @@ let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
vx128xmem, mgatherv4i32>, EVEX_V128;
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
vx64xmem, X86mgatherv2i64>, EVEX_V128;
vx64xmem, X86mgatherv2i64, VK2WM>,
EVEX_V128;
}
}

View File

@ -751,6 +751,15 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
// Hack because we can't write a tablegen pattern that requires the type
// of result 1 to be checked. So explicitly force the mask to v2i1.
def X86masked_gatherv2i64 : SDNode<"X86ISD::MGATHER",
SDTypeProfile<2, 3, [SDTCisVec<0>,
SDTCisVT<1, v2i1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<1, 3>,
SDTCisPtrTy<4>]>,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86masked_gather : SDNode<"X86ISD::MGATHER", SDTMaskedGather,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@ -778,7 +787,7 @@ def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return false;
}]>;
def X86mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86masked_gather node:$src1, node:$src2, node:$src3) , [{
(X86masked_gatherv2i64 node:$src1, node:$src2, node:$src3) , [{
if (X86MaskedGatherSDNode *Mgt = dyn_cast<X86MaskedGatherSDNode>(N))
return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
Mgt->getBasePtr().getValueType() == MVT::v2i64) &&