[X86][SSE] Lower 128-bit MOVDDUP with existing VBROADCAST mechanisms
We have a number of useful lowering strategies for VBROADCAST instructions (both from memory and register element 0) which the 128-bit form of the MOVDDUP instruction can make use of. This patch tweaks lowerVectorShuffleAsBroadcast to enable it to broadcast 2f64 args using MOVDDUP as well. It does require a slight tweak to the lowerVectorShuffleAsBroadcast mechanism as the existing MOVDDUP lowering uses isShuffleEquivalent which can match binary shuffles that can lower to (unary) broadcasts. Differential Revision: http://reviews.llvm.org/D17680 llvm-svn: 262478
This commit is contained in:
parent
f2fbabe9c1
commit
c02b72627a
|
@ -8359,29 +8359,40 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0,
|
||||||
/// filtering. While a little annoying to re-dispatch on type here, there isn't
|
/// filtering. While a little annoying to re-dispatch on type here, there isn't
|
||||||
/// a convenient way to factor it out.
|
/// a convenient way to factor it out.
|
||||||
/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them?
|
/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them?
|
||||||
static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
|
static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V1,
|
||||||
ArrayRef<int> Mask,
|
SDValue V2, ArrayRef<int> Mask,
|
||||||
const X86Subtarget &Subtarget,
|
const X86Subtarget &Subtarget,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
if (!Subtarget.hasAVX())
|
if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) ||
|
||||||
return SDValue();
|
(Subtarget.hasAVX() && VT.isFloatingPoint()) ||
|
||||||
if (VT.isInteger() && !Subtarget.hasAVX2())
|
(Subtarget.hasAVX2() && VT.isInteger())))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
// With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise
|
||||||
|
// we can only broadcast from a register with AVX2.
|
||||||
|
unsigned NumElts = Mask.size();
|
||||||
|
unsigned Opcode = VT == MVT::v2f64 ? X86ISD::MOVDDUP : X86ISD::VBROADCAST;
|
||||||
|
bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) || Subtarget.hasAVX2();
|
||||||
|
|
||||||
// Check that the mask is a broadcast.
|
// Check that the mask is a broadcast.
|
||||||
int BroadcastIdx = -1;
|
int BroadcastIdx = -1;
|
||||||
for (int M : Mask)
|
for (int i = 0; i != (int)NumElts; ++i) {
|
||||||
if (M >= 0 && BroadcastIdx == -1)
|
SmallVector<int, 8> BroadcastMask(NumElts, i);
|
||||||
BroadcastIdx = M;
|
if (isShuffleEquivalent(V1, V2, Mask, BroadcastMask)) {
|
||||||
else if (M >= 0 && M != BroadcastIdx)
|
BroadcastIdx = i;
|
||||||
return SDValue();
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (BroadcastIdx < 0)
|
||||||
|
return SDValue();
|
||||||
assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
|
assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
|
||||||
"a sorted mask where the broadcast "
|
"a sorted mask where the broadcast "
|
||||||
"comes from V1.");
|
"comes from V1.");
|
||||||
|
|
||||||
// Go up the chain of (vector) values to find a scalar load that we can
|
// Go up the chain of (vector) values to find a scalar load that we can
|
||||||
// combine with the broadcast.
|
// combine with the broadcast.
|
||||||
|
SDValue V = V1;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
switch (V.getOpcode()) {
|
switch (V.getOpcode()) {
|
||||||
case ISD::CONCAT_VECTORS: {
|
case ISD::CONCAT_VECTORS: {
|
||||||
|
@ -8434,9 +8445,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
|
||||||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
|
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
|
||||||
V = V.getOperand(BroadcastIdx);
|
V = V.getOperand(BroadcastIdx);
|
||||||
|
|
||||||
// If the scalar isn't a load, we can't broadcast from it in AVX1.
|
// If we can't broadcast from a register, check that the input is a load.
|
||||||
// Only AVX2 has register broadcasts.
|
if (!BroadcastFromReg && !isShuffleFoldableLoad(V))
|
||||||
if (!Subtarget.hasAVX2() && !isShuffleFoldableLoad(V))
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
} else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
|
} else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
|
||||||
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
|
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
|
||||||
|
@ -8453,8 +8463,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
|
||||||
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
|
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
|
||||||
DAG.getMachineFunction().getMachineMemOperand(
|
DAG.getMachineFunction().getMachineMemOperand(
|
||||||
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
|
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
|
||||||
} else if (!Subtarget.hasAVX2()) {
|
} else if (!BroadcastFromReg) {
|
||||||
// We can't broadcast from a vector register without AVX2.
|
// We can't broadcast from a vector register.
|
||||||
return SDValue();
|
return SDValue();
|
||||||
} else if (BroadcastIdx != 0) {
|
} else if (BroadcastIdx != 0) {
|
||||||
// We can only broadcast from the zero-element of a vector register,
|
// We can only broadcast from the zero-element of a vector register,
|
||||||
|
@ -8477,8 +8487,10 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
|
||||||
DAG.getIntPtrConstant(BroadcastIdx, DL));
|
DAG.getIntPtrConstant(BroadcastIdx, DL));
|
||||||
}
|
}
|
||||||
|
|
||||||
V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V);
|
if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector())
|
||||||
return DAG.getBitcast(VT, V);
|
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V);
|
||||||
|
|
||||||
|
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for whether we can use INSERTPS to perform the shuffle. We only use
|
// Check for whether we can use INSERTPS to perform the shuffle. We only use
|
||||||
|
@ -8694,10 +8706,10 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
|
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
|
||||||
|
|
||||||
if (isSingleInputShuffleMask(Mask)) {
|
if (isSingleInputShuffleMask(Mask)) {
|
||||||
// Use low duplicate instructions for masks that match their pattern.
|
// Check for being able to broadcast a single element.
|
||||||
if (Subtarget.hasSSE3())
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
|
||||||
if (isShuffleEquivalent(V1, V2, Mask, {0, 0}))
|
DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG))
|
||||||
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
|
return Broadcast;
|
||||||
|
|
||||||
// Straight shuffle of a single input vector. Simulate this by using the
|
// Straight shuffle of a single input vector. Simulate this by using the
|
||||||
// single input as both of the "inputs" to this instruction..
|
// single input as both of the "inputs" to this instruction..
|
||||||
|
@ -8776,8 +8788,8 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
|
|
||||||
if (isSingleInputShuffleMask(Mask)) {
|
if (isSingleInputShuffleMask(Mask)) {
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v2i64, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
|
||||||
Mask, Subtarget, DAG))
|
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
// Straight shuffle of a single input vector. For everything from SSE2
|
// Straight shuffle of a single input vector. For everything from SSE2
|
||||||
|
@ -8998,8 +9010,8 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
|
|
||||||
if (NumV2Elements == 0) {
|
if (NumV2Elements == 0) {
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4f32, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
|
||||||
Mask, Subtarget, DAG))
|
DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
// Use even/odd duplicate instructions for masks that match their pattern.
|
// Use even/odd duplicate instructions for masks that match their pattern.
|
||||||
|
@ -9090,8 +9102,8 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
|
|
||||||
if (NumV2Elements == 0) {
|
if (NumV2Elements == 0) {
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i32, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
|
||||||
Mask, Subtarget, DAG))
|
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
// Straight shuffle of a single input vector. For everything from SSE2
|
// Straight shuffle of a single input vector. For everything from SSE2
|
||||||
|
@ -9730,8 +9742,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
|
|
||||||
if (NumV2Inputs == 0) {
|
if (NumV2Inputs == 0) {
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i16, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
|
||||||
Mask, Subtarget, DAG))
|
DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
// Try to use shift instructions.
|
// Try to use shift instructions.
|
||||||
|
@ -9929,8 +9941,8 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
// For single-input shuffles, there are some nicer lowering tricks we can use.
|
// For single-input shuffles, there are some nicer lowering tricks we can use.
|
||||||
if (NumV2Elements == 0) {
|
if (NumV2Elements == 0) {
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i8, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
|
||||||
Mask, Subtarget, DAG))
|
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
// Check whether we can widen this to an i16 shuffle by duplicating bytes.
|
// Check whether we can widen this to an i16 shuffle by duplicating bytes.
|
||||||
|
@ -10982,8 +10994,8 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
|
|
||||||
if (isSingleInputShuffleMask(Mask)) {
|
if (isSingleInputShuffleMask(Mask)) {
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4f64, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
|
||||||
Mask, Subtarget, DAG))
|
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
// Use low duplicate instructions for masks that match their pattern.
|
// Use low duplicate instructions for masks that match their pattern.
|
||||||
|
@ -11080,7 +11092,7 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
return Blend;
|
return Blend;
|
||||||
|
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1, V2,
|
||||||
Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
|
@ -11153,7 +11165,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
return Blend;
|
return Blend;
|
||||||
|
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1, V2,
|
||||||
Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
|
@ -11259,7 +11271,7 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
return Blend;
|
return Blend;
|
||||||
|
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1, V2,
|
||||||
Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
|
@ -11341,7 +11353,7 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
return ZExt;
|
return ZExt;
|
||||||
|
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1, V2,
|
||||||
Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
|
@ -11438,7 +11450,7 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
return ZExt;
|
return ZExt;
|
||||||
|
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1,
|
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1, V2,
|
||||||
Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
|
@ -11748,7 +11760,7 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
|
|
||||||
// Check for being able to broadcast a single element.
|
// Check for being able to broadcast a single element.
|
||||||
if (SDValue Broadcast =
|
if (SDValue Broadcast =
|
||||||
lowerVectorShuffleAsBroadcast(DL, VT, V1, Mask, Subtarget, DAG))
|
lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
// Dispatch to each element type for lowering. If we don't have supprot for
|
// Dispatch to each element type for lowering. If we don't have supprot for
|
||||||
|
|
|
@ -135,8 +135,7 @@ entry:
|
||||||
define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
|
define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
|
||||||
; CHECK-LABEL: splat_load_2f64_11:
|
; CHECK-LABEL: splat_load_2f64_11:
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; CHECK-NEXT: vmovaps (%rdi), %xmm0
|
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||||
; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%x = load <2 x double>, <2 x double>* %ptr
|
%x = load <2 x double>, <2 x double>* %ptr
|
||||||
%x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
|
%x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
|
||||||
|
|
|
@ -315,14 +315,12 @@ define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwta
|
||||||
; X32-LABEL: load_splat_2f64_2f64_1111:
|
; X32-LABEL: load_splat_2f64_2f64_1111:
|
||||||
; X32: ## BB#0: ## %entry
|
; X32: ## BB#0: ## %entry
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: vmovaps (%eax), %xmm0
|
; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||||
; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: load_splat_2f64_2f64_1111:
|
; X64-LABEL: load_splat_2f64_2f64_1111:
|
||||||
; X64: ## BB#0: ## %entry
|
; X64: ## BB#0: ## %entry
|
||||||
; X64-NEXT: vmovaps (%rdi), %xmm0
|
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||||
; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%ld = load <2 x double>, <2 x double>* %ptr
|
%ld = load <2 x double>, <2 x double>* %ptr
|
||||||
|
|
|
@ -494,14 +494,12 @@ define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwta
|
||||||
; X32-LABEL: load_splat_2f64_2f64_1111:
|
; X32-LABEL: load_splat_2f64_2f64_1111:
|
||||||
; X32: ## BB#0: ## %entry
|
; X32: ## BB#0: ## %entry
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: vmovaps (%eax), %xmm0
|
; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||||
; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: load_splat_2f64_2f64_1111:
|
; X64-LABEL: load_splat_2f64_2f64_1111:
|
||||||
; X64: ## BB#0: ## %entry
|
; X64: ## BB#0: ## %entry
|
||||||
; X64-NEXT: vmovaps (%rdi), %xmm0
|
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||||
; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%ld = load <2 x double>, <2 x double>* %ptr
|
%ld = load <2 x double>, <2 x double>* %ptr
|
||||||
|
|
|
@ -63,13 +63,13 @@ define void @t3() {
|
||||||
;
|
;
|
||||||
; X64-SSSE3-LABEL: t3:
|
; X64-SSSE3-LABEL: t3:
|
||||||
; X64-SSSE3: # BB#0: # %bb
|
; X64-SSSE3: # BB#0: # %bb
|
||||||
; X64-SSSE3-NEXT: movupd (%rax), %xmm0
|
; X64-SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||||
; X64-SSSE3-NEXT: movhpd %xmm0, (%rax)
|
; X64-SSSE3-NEXT: movlpd %xmm0, (%rax)
|
||||||
;
|
;
|
||||||
; X64-AVX-LABEL: t3:
|
; X64-AVX-LABEL: t3:
|
||||||
; X64-AVX: # BB#0: # %bb
|
; X64-AVX: # BB#0: # %bb
|
||||||
; X64-AVX-NEXT: vmovupd (%rax), %xmm0
|
; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||||
; X64-AVX-NEXT: vmovhpd %xmm0, (%rax)
|
; X64-AVX-NEXT: vmovlpd %xmm0, (%rax)
|
||||||
bb:
|
bb:
|
||||||
%tmp13 = load <2 x double>, <2 x double>* undef, align 1
|
%tmp13 = load <2 x double>, <2 x double>* undef, align 1
|
||||||
%.sroa.3.24.vec.extract = extractelement <2 x double> %tmp13, i32 1
|
%.sroa.3.24.vec.extract = extractelement <2 x double> %tmp13, i32 1
|
||||||
|
|
|
@ -20,8 +20,7 @@ define float @t2(<4 x float>* %P1) nounwind {
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: pushl %eax
|
; CHECK-NEXT: pushl %eax
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: movapd (%eax), %xmm0
|
; CHECK-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||||
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
||||||
; CHECK-NEXT: movss %xmm0, (%esp)
|
; CHECK-NEXT: movss %xmm0, (%esp)
|
||||||
; CHECK-NEXT: flds (%esp)
|
; CHECK-NEXT: flds (%esp)
|
||||||
; CHECK-NEXT: popl %eax
|
; CHECK-NEXT: popl %eax
|
||||||
|
|
Loading…
Reference in New Issue