[SelectionDAG] When splitting scatter nodes during DAGCombine, create a serial chain dependency.

Scatter could have multiple identical indices. We need to maintain sequential order. We get this right in LegalizeVectorTypes, but not in this code.

Differential Revision: https://reviews.llvm.org/D50374

llvm-svn: 339157
This commit is contained in:
Craig Topper 2018-08-07 17:35:02 +00:00
parent deb2899b2d
commit 49ed49fcb1
2 changed files with 25 additions and 27 deletions

View File

@ -7401,7 +7401,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
TargetLowering::TypeSplitVector)
return SDValue();
SDValue MaskLo, MaskHi, Lo, Hi;
SDValue MaskLo, MaskHi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
EVT LoVT, HiVT;
@ -7429,17 +7429,15 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
Alignment, MSC->getAAInfo(), MSC->getRanges());
SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
DataLo.getValueType(), DL, OpsLo, MMO);
SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
// The order of the Scatter operation after split is well defined. The "Hi"
// part comes after the "Lo". So these two operations should be chained one
// after another.
SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {

View File

@ -2857,11 +2857,11 @@ define void @test_scatter_setcc_split(double* %base, <16 x i32> %ind, <16 x i32>
; KNL_64-LABEL: test_scatter_setcc_split:
; KNL_64: # %bb.0:
; KNL_64-NEXT: vextractf64x4 $1, %zmm0, %ymm4
; KNL_64-NEXT: vptestnmd %zmm1, %zmm1, %k1
; KNL_64-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; KNL_64-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; KNL_64-NEXT: vptestnmd %zmm5, %zmm5, %k1
; KNL_64-NEXT: vptestnmd %zmm1, %zmm1, %k2
; KNL_64-NEXT: vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k2}
; KNL_64-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k1}
; KNL_64-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2}
; KNL_64-NEXT: vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k1}
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
@ -2877,11 +2877,11 @@ define void @test_scatter_setcc_split(double* %base, <16 x i32> %ind, <16 x i32>
; KNL_32-NEXT: vmovapd 72(%ebp), %zmm3
; KNL_32-NEXT: movl 8(%ebp), %eax
; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm4
; KNL_32-NEXT: vptestnmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; KNL_32-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; KNL_32-NEXT: vptestnmd %zmm5, %zmm5, %k1
; KNL_32-NEXT: vptestnmd %zmm1, %zmm1, %k2
; KNL_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm4,8) {%k2}
; KNL_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k1}
; KNL_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2}
; KNL_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm4,8) {%k1}
; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp
; KNL_32-NEXT: .cfi_def_cfa %esp, 4
@ -2891,11 +2891,11 @@ define void @test_scatter_setcc_split(double* %base, <16 x i32> %ind, <16 x i32>
; SKX-LABEL: test_scatter_setcc_split:
; SKX: # %bb.0:
; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm4
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1
; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; SKX-NEXT: vptestnmd %ymm5, %ymm5, %k1
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k2
; SKX-NEXT: vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k2}
; SKX-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k1}
; SKX-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2}
; SKX-NEXT: vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k1}
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
;
@ -2911,11 +2911,11 @@ define void @test_scatter_setcc_split(double* %base, <16 x i32> %ind, <16 x i32>
; SKX_32-NEXT: vmovapd 72(%ebp), %zmm3
; SKX_32-NEXT: movl 8(%ebp), %eax
; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm4
; SKX_32-NEXT: vptestnmd %ymm1, %ymm1, %k1
; SKX_32-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; SKX_32-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; SKX_32-NEXT: vptestnmd %ymm5, %ymm5, %k1
; SKX_32-NEXT: vptestnmd %ymm1, %ymm1, %k2
; SKX_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm4,8) {%k2}
; SKX_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k1}
; SKX_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2}
; SKX_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm4,8) {%k1}
; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp
; SKX_32-NEXT: .cfi_def_cfa %esp, 4