[x86] fix uses check in broadcast transform (PR38949)
https://bugs.llvm.org/show_bug.cgi?id=38949 It's not clear to me that we even need a one-use check in this fold. Ie, 2 independent loads might be better than a load+dependent shuffle. Note that the existing re-use tests are not affected. We actually do form a broadcast node in those tests now because there's no extra use of the insert_subvector node in those cases. But something later in isel pattern matching decides that it is not worth using a broadcast for the full load in those tests: Legalized selection DAG: %bb.0 'test_broadcast_2f64_4f64_reuse:' t7: v2f64,ch = load<(load 16 from %ir.p0)> t0, t2, undef:i64 t4: i64,ch = CopyFromReg t0, Register:i64 %1 t10: ch = store<(store 16 into %ir.p1)> t7:1, t7, t4, undef:i64 t18: v4f64 = insert_subvector undef:v4f64, t7, Constant:i64<0> t20: v4f64 = insert_subvector t18, t7, Constant:i64<2> Becomes: t7: v2f64,ch = load<(load 16 from %ir.p0)> t0, t2, undef:i64 t4: i64,ch = CopyFromReg t0, Register:i64 %1 t10: ch = store<(store 16 into %ir.p1)> t7:1, t7, t4, undef:i64 t21: v4f64 = X86ISD::SUBV_BROADCAST t7 ISEL: Starting selection on root node: t21: v4f64 = X86ISD::SUBV_BROADCAST t7 ... Created node: t27: v4f64 = INSERT_SUBREG IMPLICIT_DEF:v4f64, t7, TargetConstant:i32<7> Morphed node: t21: v4f64 = VINSERTF128rr t27, t7, TargetConstant:i8<1> llvm-svn: 342347
This commit is contained in:
parent
3e095174b0
commit
bfee5a9b42
|
@ -40114,11 +40114,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
|
|||
return Ld;
|
||||
}
|
||||
}
|
||||
// If lower/upper loads are the same and the only users of the load, then
|
||||
// lower to a VBROADCASTF128/VBROADCASTI128/etc.
|
||||
// If lower/upper loads are the same and there's no other use of the lower
|
||||
// load, then splat the loaded value with a broadcast.
|
||||
if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2)))
|
||||
if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
|
||||
SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode()))
|
||||
if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse())
|
||||
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
|
||||
|
||||
// If this is subv_broadcast insert into both halves, use a larger
|
||||
|
|
|
@ -106,16 +106,14 @@ define void @subv_reuse_is_ok(<4 x float>* %a, <8 x float>* %b) {
|
|||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: vmovups (%ecx), %xmm0
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X32-NEXT: vmovups %ymm0, (%eax)
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: subv_reuse_is_ok:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovups (%rdi), %xmm0
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
||||
; X64-NEXT: vmovups %ymm0, (%rsi)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue