Make sure DAGCombiner doesn't introduce multiple loads from the same memory location. PR10747, part 2.

llvm-svn: 147283
This commit is contained in:
Eli Friedman 2011-12-26 22:49:32 +00:00
parent 2b081c2c9e
commit e96286cdf2
3 changed files with 44 additions and 6 deletions

View File

@ -6905,6 +6905,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
EVT LVT = ExtVT; EVT LVT = ExtVT;
if (InVec.getOpcode() == ISD::BITCAST) { if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType(); EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue(); return SDValue();
@ -6922,12 +6926,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
InVec.getOperand(0).getValueType() == ExtVT && InVec.getOperand(0).getValueType() == ExtVT &&
ISD::isNormalLoad(InVec.getOperand(0).getNode())) { ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
LN0 = cast<LoadSDNode>(InVec.getOperand(0)); LN0 = cast<LoadSDNode>(InVec.getOperand(0));
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
// => // =>
// (load $addr+1*size) // (load $addr+1*size)
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
// If the bit convert changed the number of elements, it is unsafe // If the bit convert changed the number of elements, it is unsafe
// to examine the mask. // to examine the mask.
if (BCNumEltsChanged) if (BCNumEltsChanged)
@ -6938,14 +6950,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
if (InVec.getOpcode() == ISD::BITCAST) if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
InVec = InVec.getOperand(0); InVec = InVec.getOperand(0);
}
if (ISD::isNormalLoad(InVec.getNode())) { if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec); LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
} }
} }
// Make sure we found a non-volatile load and the extractelement is
// the only use.
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return SDValue(); return SDValue();
@ -6982,6 +7001,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// The replacement we need to do here is a little tricky: we need to // The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load. // replace an extractelement of a load with a load.
// Use ReplaceAllUsesOfValuesWith to do the replacement. // Use ReplaceAllUsesOfValuesWith to do the replacement.
// Note that this replacement assumes that the extractvalue is the only
// use of the load; that's okay because we don't want to perform this
// transformation in other cases anyway.
SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(), LN0->isVolatile(), LN0->isNonTemporal(),

View File

@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
; CHECK: movw r1, :lower16:{{.*}} ; CHECK: movw r1, :lower16:{{.*}}
; CHECK: movt r1, :upper16:{{.*}} ; CHECK: movt r1, :upper16:{{.*}}
; CHECK: vldmia r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]} ; CHECK: vldmia r1
; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short3]] ; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short2]] ; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short1]] ; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short0]] ; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
; CHECK: vstmia {{.*}} ; CHECK: vstmia {{.*}}
L.entry: L.entry:

View File

@ -0,0 +1,16 @@
; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
; Make sure we don't load from the location pointed to by %p
; twice: it has non-obvious performance implications, and
; the relevant transformation doesn't know how to update
; the chains correctly.
; PR10747
; CHECK: test:
; CHECK: pextrd $2, %xmm
define <4 x i32> @test(<4 x i32>* %p) {
%v = load <4 x i32>* %p
%e = extractelement <4 x i32> %v, i32 2
%cmp = icmp eq i32 %e, 3
%sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
ret <4 x i32> %sel
}