[InstCombine] limit extracting shuffle transform based on uses

As discussed in D53037, this can lead to worse codegen, and we
don't generally expect the backend to be able to optimize
arbitrary shuffles. If there's only one use of the 1st shuffle,
that means it's getting removed, so that should always be
safe.

llvm-svn: 353235
This commit is contained in:
Sanjay Patel 2019-02-05 22:58:45 +00:00
parent 1b8df42712
commit cddb1e5469
2 changed files with 7 additions and 2 deletions

View File

@ -1498,6 +1498,11 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
return nullptr;
// Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
// then combining may result in worse codegen.
if (!Op0->hasOneUse())
return nullptr;
// We are extracting a subvector from a shuffle. Remove excess elements from
// the 1st shuffle mask to eliminate the extract.
//

View File

@ -195,7 +195,7 @@ define <4 x i8> @extract_subvector_of_shuffle_undefs_types(<2 x i8> %x, <2 x i8>
ret <4 x i8> %extract_subv
}
; FIXME: Extra uses are not ok - we should only do the transform when we can eliminate an instruction.
; Extra uses are not ok - we only do the transform when we can eliminate an instruction.
declare void @use_v5i8(<5 x i8>)
@ -203,7 +203,7 @@ define <4 x i8> @extract_subvector_of_shuffle_extra_use(<2 x i8> %x, <2 x i8> %y
; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
; CHECK-NEXT: call void @use_v5i8(<5 x i8> [[SHUF]])
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> [[Y]], <4 x i32> <i32 undef, i32 2, i32 0, i32 undef>
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>