diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6a4333704120..e774c138d281 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1381,6 +1381,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (add x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; } // fold (add x, undef) -> undef @@ -1624,6 +1630,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (sub x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; } // fold (sub x, x) -> 0 diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll index 6cd27618de1d..2bde76efd2ae 100644 --- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -43,11 +43,11 @@ forbody: ; preds = %forcond %mul171.i = fmul <4 x float> %add167.i, %sub140.i ; <<4 x float>> [#uses=1] %add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 > ; <<4 x float>> [#uses=1] %bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32> ; <<4 x i32>> [#uses=1] - %andnps178.i = add <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1] + %andnps178.i = add <4 x i32> %bitcast176.i, ; <<4 x i32>> [#uses=1] %bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float> ; <<4 x float>> [#uses=1] %mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer ; <<4 x float>> [#uses=1] %bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32> ; <<4 x i32>> [#uses=1] - %andnps192.i = add <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1] + %andnps192.i = add <4 x i32> %bitcast190.i, ; <<4 x i32>> [#uses=1] %xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %orps203.i = add <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1] %bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float> ; <<4 x float>> [#uses=1] @@ -55,9 +55,9 @@ forbody: ; preds = %forcond %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1] %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1] %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2] - %andps.i14 = add <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1] + %andps.i14 = add <4 x i32> , %bitcast6.i13 ; <<4 x i32>> [#uses=1] %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %andnps.i17 = add <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1] + %andnps.i17 = add <4 x i32> , %not.i16 ; <<4 x i32>> [#uses=1] %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1] %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1] %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/vec_zero.ll b/llvm/test/CodeGen/X86/vec_zero.ll index 682a0dfca806..c3ea0ad2023f 100644 --- a/llvm/test/CodeGen/X86/vec_zero.ll +++ b/llvm/test/CodeGen/X86/vec_zero.ll @@ -13,7 +13,7 @@ define void @foo(<4 x float>* %P) { ; CHECK: pxor define void @bar(<4 x i32>* %P) { %T = load <4 x i32>* %P ; <<4 x i32>> [#uses=1] - %S = add <4 x i32> zeroinitializer, %T ; <<4 x i32>> [#uses=1] + %S = sub <4 x i32> zeroinitializer, %T ; <<4 x i32>> [#uses=1] store <4 x i32> %S, <4 x i32>* %P ret void }