[TargetLowering] Fix propagation of undefs in zero extension ops (PR40091)

As described on PR40091, we have several places where zext (and zext_vector_inreg) fold an undef input into an undef output. For zero extensions this is incorrect as the output should guarantee to least have the new upper bits set to zero.

SimplifyDemandedVectorElts is the worst offender (and its the most likely to cause new undefs to appear) but DAGCombiner's tryToFoldExtendOfConstant has a similar issue.

Thanks to @dmgreen for catching this.

Differential Revision: https://reviews.llvm.org/D55883

llvm-svn: 349625
This commit is contained in:
Simon Pilgrim 2018-12-19 13:37:59 +00:00
parent f7cf1a1a73
commit 6c95bea072
4 changed files with 34 additions and 15 deletions

View File

@ -8064,10 +8064,15 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned NumElts = VT.getVectorNumElements();
SDLoc DL(N);
for (unsigned i=0; i != NumElts; ++i) {
SDValue Op = N0->getOperand(i);
if (Op->isUndef()) {
Elts.push_back(DAG.getUNDEF(SVT));
// For zero-extensions, UNDEF elements still guarantee to have the upper
// bits set to zero.
bool IsZext =
Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Op = N0.getOperand(i);
if (Op.isUndef()) {
Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
continue;
}

View File

@ -1848,6 +1848,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
KnownZero = SrcZero.zextOrTrunc(NumElts);
KnownUndef = SrcUndef.zextOrTrunc(NumElts);
if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
// zext(undef) upper bits are guaranteed to be zero.
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
}
break;
}
case ISD::OR:
@ -1892,6 +1899,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
if (Op.getOpcode() == ISD::ZERO_EXTEND) {
// zext(undef) upper bits are guaranteed to be zero.
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
}
break;
default: {
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {

View File

@ -4,7 +4,7 @@
define i64 @test(i64 %aa) {
; CHECK-LABEL: test:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-NEXT: movi v0.8b, #137
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:

View File

@ -261,12 +261,12 @@ define <4 x i64> @test_zext_4i8_4i64() {
define <4 x i16> @test_zext_4i8_4i16_undef() {
; X32-LABEL: test_zext_4i8_4i16_undef:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253>
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_4i8_4i16_undef:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253>
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253]
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 undef, i32 0
%2 = insertelement <4 x i8> %1, i8 -1, i32 1
@ -279,12 +279,12 @@ define <4 x i16> @test_zext_4i8_4i16_undef() {
define <4 x i32> @test_zext_4i8_4i32_undef() {
; X32-LABEL: test_zext_4i8_4i32_undef:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u>
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_4i8_4i32_undef:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u>
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0]
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 0, i32 0
%2 = insertelement <4 x i8> %1, i8 undef, i32 1
@ -297,12 +297,12 @@ define <4 x i32> @test_zext_4i8_4i32_undef() {
define <4 x i64> @test_zext_4i8_4i64_undef() {
; X32-LABEL: test_zext_4i8_4i64_undef:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} ymm0 = <u,u,255,0,2,0,u,u>
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,0,0]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_4i8_4i64_undef:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} ymm0 = <u,255,2,u>
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,0]
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 undef, i32 0
%2 = insertelement <4 x i8> %1, i8 -1, i32 1
@ -359,12 +359,12 @@ define <8 x i32> @test_zext_8i8_8i32() {
define <8 x i16> @test_zext_8i8_8i16_undef() {
; X32-LABEL: test_zext_8i8_8i16_undef:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249>
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_8i8_8i16_undef:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249>
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
; X64-NEXT: retq
%1 = insertelement <8 x i8> undef, i8 undef, i32 0
%2 = insertelement <8 x i8> %1, i8 -1, i32 1
@ -381,12 +381,12 @@ define <8 x i16> @test_zext_8i8_8i16_undef() {
define <8 x i32> @test_zext_8i8_8i32_undef() {
; X32-LABEL: test_zext_8i8_8i32_undef:
; X32: # %bb.0:
; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u>
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_8i8_8i32_undef:
; X64: # %bb.0:
; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u>
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
; X64-NEXT: retq
%1 = insertelement <8 x i8> undef, i8 0, i32 0
%2 = insertelement <8 x i8> %1, i8 undef, i32 1