[SelectionDAG] Add FoldBUILD_VECTOR to simplify new BUILD_VECTOR nodes
Similar to FoldCONCAT_VECTORS, this patch adds FoldBUILD_VECTOR to simplify cases that can avoid the creation of the BUILD_VECTOR - if all the operands are UNDEF or if the BUILD_VECTOR simplifies to a copy. This exposed an assumption in some AMDGPU code that getBuildVector was guaranteed to be a BUILD_VECTOR node that I've tried to handle. Differential Revision: https://reviews.llvm.org/D53760 llvm-svn: 345578
This commit is contained in:
parent
4538ed3b85
commit
858303b827
|
@ -3805,6 +3805,38 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
|
||||||
return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
|
return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
|
||||||
|
ArrayRef<SDValue> Ops,
|
||||||
|
SelectionDAG &DAG) {
|
||||||
|
int NumOps = Ops.size();
|
||||||
|
assert(NumOps != 0 && "Can't build an empty vector!");
|
||||||
|
assert(VT.getVectorNumElements() == NumOps &&
|
||||||
|
"Incorrect element count in BUILD_VECTOR!");
|
||||||
|
|
||||||
|
// BUILD_VECTOR of UNDEFs is UNDEF.
|
||||||
|
if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
|
||||||
|
return DAG.getUNDEF(VT);
|
||||||
|
|
||||||
|
// BUILD_VECTOR of seq extract/insert from the same vector + type is Identity.
|
||||||
|
SDValue IdentitySrc;
|
||||||
|
bool IsIdentity = true;
|
||||||
|
for (int i = 0; i != NumOps; ++i) {
|
||||||
|
if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
|
||||||
|
Ops[i].getOperand(0).getValueType() != VT ||
|
||||||
|
(IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) ||
|
||||||
|
!isa<ConstantSDNode>(Ops[i].getOperand(1)) ||
|
||||||
|
cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) {
|
||||||
|
IsIdentity = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
IdentitySrc = Ops[i].getOperand(0);
|
||||||
|
}
|
||||||
|
if (IsIdentity)
|
||||||
|
return IdentitySrc;
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
|
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
|
||||||
ArrayRef<SDValue> Ops,
|
ArrayRef<SDValue> Ops,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
|
@ -4059,6 +4091,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||||
case ISD::MERGE_VALUES:
|
case ISD::MERGE_VALUES:
|
||||||
case ISD::CONCAT_VECTORS:
|
case ISD::CONCAT_VECTORS:
|
||||||
return Operand; // Factor, merge or concat of one node? No need.
|
return Operand; // Factor, merge or concat of one node? No need.
|
||||||
|
case ISD::BUILD_VECTOR: {
|
||||||
|
// Attempt to simplify BUILD_VECTOR.
|
||||||
|
SDValue Ops[] = {Operand};
|
||||||
|
if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
|
||||||
|
return V;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
|
case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
|
||||||
case ISD::FP_EXTEND:
|
case ISD::FP_EXTEND:
|
||||||
assert(VT.isFloatingPoint() &&
|
assert(VT.isFloatingPoint() &&
|
||||||
|
@ -4548,6 +4587,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||||
if (N2.getOpcode() == ISD::EntryToken) return N1;
|
if (N2.getOpcode() == ISD::EntryToken) return N1;
|
||||||
if (N1 == N2) return N1;
|
if (N1 == N2) return N1;
|
||||||
break;
|
break;
|
||||||
|
case ISD::BUILD_VECTOR: {
|
||||||
|
// Attempt to simplify BUILD_VECTOR.
|
||||||
|
SDValue Ops[] = {N1, N2};
|
||||||
|
if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
|
||||||
|
return V;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case ISD::CONCAT_VECTORS: {
|
case ISD::CONCAT_VECTORS: {
|
||||||
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
||||||
SDValue Ops[] = {N1, N2};
|
SDValue Ops[] = {N1, N2};
|
||||||
|
@ -5019,6 +5065,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case ISD::BUILD_VECTOR: {
|
||||||
|
// Attempt to simplify BUILD_VECTOR.
|
||||||
|
SDValue Ops[] = {N1, N2, N3};
|
||||||
|
if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
|
||||||
|
return V;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case ISD::CONCAT_VECTORS: {
|
case ISD::CONCAT_VECTORS: {
|
||||||
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
||||||
SDValue Ops[] = {N1, N2, N3};
|
SDValue Ops[] = {N1, N2, N3};
|
||||||
|
@ -6788,6 +6841,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||||
|
|
||||||
switch (Opcode) {
|
switch (Opcode) {
|
||||||
default: break;
|
default: break;
|
||||||
|
case ISD::BUILD_VECTOR:
|
||||||
|
// Attempt to simplify BUILD_VECTOR.
|
||||||
|
if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
|
||||||
|
return V;
|
||||||
|
break;
|
||||||
case ISD::CONCAT_VECTORS:
|
case ISD::CONCAT_VECTORS:
|
||||||
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
|
||||||
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
|
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
|
||||||
|
|
|
@ -1685,14 +1685,15 @@ bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||||
static SDValue CompactSwizzlableVector(
|
static SDValue CompactSwizzlableVector(
|
||||||
SelectionDAG &DAG, SDValue VectorEntry,
|
SelectionDAG &DAG, SDValue VectorEntry,
|
||||||
DenseMap<unsigned, unsigned> &RemapSwizzle) {
|
DenseMap<unsigned, unsigned> &RemapSwizzle) {
|
||||||
assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
|
|
||||||
assert(RemapSwizzle.empty());
|
assert(RemapSwizzle.empty());
|
||||||
SDValue NewBldVec[4] = {
|
|
||||||
VectorEntry.getOperand(0),
|
SDLoc DL(VectorEntry);
|
||||||
VectorEntry.getOperand(1),
|
EVT EltTy = VectorEntry.getValueType().getVectorElementType();
|
||||||
VectorEntry.getOperand(2),
|
|
||||||
VectorEntry.getOperand(3)
|
SDValue NewBldVec[4];
|
||||||
};
|
for (unsigned i = 0; i < 4; i++)
|
||||||
|
NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
|
||||||
|
DAG.getIntPtrConstant(i, DL));
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
if (NewBldVec[i].isUndef())
|
if (NewBldVec[i].isUndef())
|
||||||
|
@ -1727,15 +1728,17 @@ static SDValue CompactSwizzlableVector(
|
||||||
|
|
||||||
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
|
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
|
||||||
DenseMap<unsigned, unsigned> &RemapSwizzle) {
|
DenseMap<unsigned, unsigned> &RemapSwizzle) {
|
||||||
assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
|
|
||||||
assert(RemapSwizzle.empty());
|
assert(RemapSwizzle.empty());
|
||||||
SDValue NewBldVec[4] = {
|
|
||||||
VectorEntry.getOperand(0),
|
SDLoc DL(VectorEntry);
|
||||||
VectorEntry.getOperand(1),
|
EVT EltTy = VectorEntry.getValueType().getVectorElementType();
|
||||||
VectorEntry.getOperand(2),
|
|
||||||
VectorEntry.getOperand(3)
|
SDValue NewBldVec[4];
|
||||||
};
|
bool isUnmovable[4] = {false, false, false, false};
|
||||||
bool isUnmovable[4] = { false, false, false, false };
|
for (unsigned i = 0; i < 4; i++)
|
||||||
|
NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
|
||||||
|
DAG.getIntPtrConstant(i, DL));
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
RemapSwizzle[i] = i;
|
RemapSwizzle[i] = i;
|
||||||
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
||||||
|
@ -1766,7 +1769,6 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
|
||||||
SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
|
SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
|
||||||
SelectionDAG &DAG,
|
SelectionDAG &DAG,
|
||||||
const SDLoc &DL) const {
|
const SDLoc &DL) const {
|
||||||
assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
|
|
||||||
// Old -> New swizzle values
|
// Old -> New swizzle values
|
||||||
DenseMap<unsigned, unsigned> SwizzleRemap;
|
DenseMap<unsigned, unsigned> SwizzleRemap;
|
||||||
|
|
||||||
|
|
|
@ -96,8 +96,8 @@ entry:
|
||||||
; GFX9-NOT: m0
|
; GFX9-NOT: m0
|
||||||
; SICIVI: s_mov_b32 m0
|
; SICIVI: s_mov_b32 m0
|
||||||
|
|
||||||
; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}}
|
; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
|
||||||
; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
|
; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
|
||||||
|
|
||||||
|
|
||||||
; EG: LDS_READ_RET
|
; EG: LDS_READ_RET
|
||||||
|
|
|
@ -296,59 +296,10 @@ define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
|
define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
|
||||||
; SSE2-LABEL: _clearupper16xi8a:
|
; SSE-LABEL: _clearupper16xi8a:
|
||||||
; SSE2: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
; SSE-NEXT: retq
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm3
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
|
|
||||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm2
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm2
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm4
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
|
||||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
|
|
||||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
||||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
|
||||||
; SSE2-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE42-LABEL: _clearupper16xi8a:
|
|
||||||
; SSE42: # %bb.0:
|
|
||||||
; SSE42-NEXT: andps {{.*}}(%rip), %xmm0
|
|
||||||
; SSE42-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX-LABEL: _clearupper16xi8a:
|
; AVX-LABEL: _clearupper16xi8a:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
|
@ -422,107 +373,12 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
|
define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
|
||||||
; SSE2-LABEL: _clearupper32xi8a:
|
; SSE-LABEL: _clearupper32xi8a:
|
||||||
; SSE2: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||||
; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
|
; SSE-NEXT: andps %xmm2, %xmm0
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
; SSE-NEXT: andps %xmm2, %xmm1
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
; SSE-NEXT: retq
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm3
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
|
|
||||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm2
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm0
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm2
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm4
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
|
||||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
|
|
||||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
||||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
|
||||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm3
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm5
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
|
|
||||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm4
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm4
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
|
|
||||||
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm6
|
|
||||||
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
||||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
|
|
||||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
|
|
||||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
|
|
||||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
|
||||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE42-LABEL: _clearupper32xi8a:
|
|
||||||
; SSE42: # %bb.0:
|
|
||||||
; SSE42-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
|
||||||
; SSE42-NEXT: andps %xmm2, %xmm0
|
|
||||||
; SSE42-NEXT: andps %xmm2, %xmm1
|
|
||||||
; SSE42-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX-LABEL: _clearupper32xi8a:
|
; AVX-LABEL: _clearupper32xi8a:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
|
|
|
@ -234,8 +234,6 @@ define <8 x i32> @test10(<8 x i32>* %a) {
|
||||||
; SSE-LABEL: test10:
|
; SSE-LABEL: test10:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||||
; SSE-NEXT: movdqa 16(%rdi), %xmm1
|
|
||||||
; SSE-NEXT: psrad %xmm0, %xmm1
|
|
||||||
; SSE-NEXT: psrad $1, %xmm0
|
; SSE-NEXT: psrad $1, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue