[mips][msa] Pattern match the splat.d instruction
Introduced a new pattern for matching splat.d explicitly. Both splat.d and splati.d can now be generated from the @llvm.mips.splat.d intrinsic depending on whether an immediate value has been passed. Differential Revision: https://reviews.llvm.org/D45683 llvm-svn: 331771
This commit is contained in:
parent
2864b46469
commit
c7113cc9e4
|
@ -181,8 +181,28 @@ def vsplati16 : PatFrag<(ops node:$e0),
|
||||||
def vsplati32 : PatFrag<(ops node:$e0),
|
def vsplati32 : PatFrag<(ops node:$e0),
|
||||||
(v4i32 (build_vector node:$e0, node:$e0,
|
(v4i32 (build_vector node:$e0, node:$e0,
|
||||||
node:$e0, node:$e0))>;
|
node:$e0, node:$e0))>;
|
||||||
|
|
||||||
|
def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
|
||||||
|
APInt Imm;
|
||||||
|
SDNode *BV = N->getOperand(0).getNode();
|
||||||
|
EVT EltTy = N->getValueType(0).getVectorElementType();
|
||||||
|
|
||||||
|
return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
|
||||||
|
Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
|
||||||
|
}]>;
|
||||||
|
|
||||||
def vsplati64 : PatFrag<(ops node:$e0),
|
def vsplati64 : PatFrag<(ops node:$e0),
|
||||||
(v2i64 (build_vector node:$e0, node:$e0))>;
|
(v2i64 (build_vector node:$e0, node:$e0))>;
|
||||||
|
|
||||||
|
def vsplati64_splat_d : PatFrag<(ops node:$e0),
|
||||||
|
(v2i64 (bitconvert
|
||||||
|
(v4i32 (and
|
||||||
|
(v4i32 (build_vector node:$e0,
|
||||||
|
node:$e0,
|
||||||
|
node:$e0,
|
||||||
|
node:$e0)),
|
||||||
|
vsplati64_imm_eq_1))))>;
|
||||||
|
|
||||||
def vsplatf32 : PatFrag<(ops node:$e0),
|
def vsplatf32 : PatFrag<(ops node:$e0),
|
||||||
(v4f32 (build_vector node:$e0, node:$e0,
|
(v4f32 (build_vector node:$e0, node:$e0,
|
||||||
node:$e0, node:$e0))>;
|
node:$e0, node:$e0))>;
|
||||||
|
@ -196,7 +216,8 @@ def vsplati16_elt : PatFrag<(ops node:$v, node:$i),
|
||||||
def vsplati32_elt : PatFrag<(ops node:$v, node:$i),
|
def vsplati32_elt : PatFrag<(ops node:$v, node:$i),
|
||||||
(MipsVSHF (vsplati32 node:$i), node:$v, node:$v)>;
|
(MipsVSHF (vsplati32 node:$i), node:$v, node:$v)>;
|
||||||
def vsplati64_elt : PatFrag<(ops node:$v, node:$i),
|
def vsplati64_elt : PatFrag<(ops node:$v, node:$i),
|
||||||
(MipsVSHF (vsplati64 node:$i), node:$v, node:$v)>;
|
(MipsVSHF (vsplati64_splat_d node:$i),
|
||||||
|
node:$v, node:$v)>;
|
||||||
|
|
||||||
class SplatPatLeaf<Operand opclass, dag frag, code pred = [{}],
|
class SplatPatLeaf<Operand opclass, dag frag, code pred = [{}],
|
||||||
SDNodeXForm xform = NOOP_SDNodeXForm>
|
SDNodeXForm xform = NOOP_SDNodeXForm>
|
||||||
|
@ -327,15 +348,6 @@ def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{
|
||||||
Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
|
Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
|
|
||||||
APInt Imm;
|
|
||||||
SDNode *BV = N->getOperand(0).getNode();
|
|
||||||
EVT EltTy = N->getValueType(0).getVectorElementType();
|
|
||||||
|
|
||||||
return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
|
|
||||||
Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
|
|
||||||
}]>;
|
|
||||||
|
|
||||||
def vbclr_b : PatFrag<(ops node:$ws, node:$wt),
|
def vbclr_b : PatFrag<(ops node:$ws, node:$wt),
|
||||||
(and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
|
(and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
|
||||||
immAllOnesV))>;
|
immAllOnesV))>;
|
||||||
|
|
|
@ -1343,7 +1343,16 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
|
||||||
SDValue LaneB;
|
SDValue LaneB;
|
||||||
|
|
||||||
if (ResVecTy == MVT::v2i64) {
|
if (ResVecTy == MVT::v2i64) {
|
||||||
|
// In case of the index being passed as an immediate value, set the upper
|
||||||
|
// lane to 0 so that the splati.d instruction can be matched.
|
||||||
|
if (isa<ConstantSDNode>(LaneA))
|
||||||
LaneB = DAG.getConstant(0, DL, MVT::i32);
|
LaneB = DAG.getConstant(0, DL, MVT::i32);
|
||||||
|
// Having the index passed in a register, set the upper lane to the same
|
||||||
|
// value as the lower - this results in the BUILD_VECTOR node not being
|
||||||
|
// expanded through stack. This way we are able to pattern match the set of
|
||||||
|
// nodes created here to splat.d.
|
||||||
|
else
|
||||||
|
LaneB = LaneA;
|
||||||
ViaVecTy = MVT::v4i32;
|
ViaVecTy = MVT::v4i32;
|
||||||
if(BigEndian)
|
if(BigEndian)
|
||||||
std::swap(LaneA, LaneB);
|
std::swap(LaneA, LaneB);
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
; Test the MSA splat intrinsics that are encoded with the 3R instruction
|
; Test the MSA splat intrinsics that are encoded with the 3R instruction
|
||||||
; format.
|
; format.
|
||||||
|
|
||||||
; RUN: llc -march=mips -mattr=+msa,+fp64 -relocation-model=pic < %s | \
|
; RUN: llc -march=mips -mcpu=mips32r5 -mattr=+msa,+fp64 -relocation-model=pic < %s | \
|
||||||
; RUN: FileCheck -check-prefix=MIPS32 %s
|
; RUN: FileCheck -check-prefix=MIPS32 %s
|
||||||
; RUN: llc -march=mipsel -mattr=+msa,+fp64 -relocation-model=pic < %s | \
|
; RUN: llc -march=mipsel -mcpu=mips32r5 -mattr=+msa,+fp64 -relocation-model=pic < %s | \
|
||||||
; RUN: FileCheck -check-prefix=MIPS32 %s
|
; RUN: FileCheck -check-prefix=MIPS32 %s
|
||||||
|
|
||||||
@llvm_mips_splat_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
|
@llvm_mips_splat_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
|
||||||
|
@ -83,14 +83,11 @@ entry:
|
||||||
declare <2 x i64> @llvm.mips.splat.d(<2 x i64>, i32) nounwind
|
declare <2 x i64> @llvm.mips.splat.d(<2 x i64>, i32) nounwind
|
||||||
|
|
||||||
; MIPS32: llvm_mips_splat_d_test:
|
; MIPS32: llvm_mips_splat_d_test:
|
||||||
; FIXME: This test is currently disabled for MIPS32 because the indices are
|
; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_splat_d_ARG1)(
|
||||||
; difficult to match. This is because 64-bit values cannot be stored in
|
; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_splat_d_RES)(
|
||||||
; GPR32.
|
; MIPS32-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
|
||||||
; MIPS64-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_splat_d_ARG1)(
|
; MIPS32-DAG: splat.d [[R4:\$w[0-9]+]], [[R3]][$4]
|
||||||
; MIPS64-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_splat_d_RES)(
|
; MIPS32-DAG: st.d [[R4]], 0([[R2]])
|
||||||
; MIPS64-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
|
|
||||||
; MIPS64-DAG: splat.d [[R4:\$w[0-9]+]], [[R3]][$4]
|
|
||||||
; MIPS64-DAG: st.d [[R4]], 0([[R2]])
|
|
||||||
; MIPS32: .size llvm_mips_splat_d_test
|
; MIPS32: .size llvm_mips_splat_d_test
|
||||||
|
|
||||||
define void @llvm_mips_splat_d_arg_test(i32 %arg) {
|
define void @llvm_mips_splat_d_arg_test(i32 %arg) {
|
||||||
|
@ -99,10 +96,14 @@ entry:
|
||||||
store volatile <2 x i64> %0, <2 x i64>* @llvm_mips_splat_d_RES
|
store volatile <2 x i64> %0, <2 x i64>* @llvm_mips_splat_d_RES
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
; CHECK-LABEL: llvm_mips_splat_d_arg_test
|
; MIPS32-LABEL: llvm_mips_splat_d_arg_test
|
||||||
; CHECK: ldi.w [[R1:\$w[0-9]+]], 1
|
; MIPS32-DAG: lw [[R0:\$[0-9]+]], %got(
|
||||||
; CHECK: and.v [[R2:\$w[0-9]+]], {{\$w[0-9]+}}, [[R1]]
|
; MIPS32-DAG: addiu [[R1:\$[0-9]+]], [[R0]], %lo(
|
||||||
; CHECK: vshf.d [[R2]], {{.*}}
|
; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_splat_d_RES)(
|
||||||
|
; MIPS32-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
|
||||||
|
; MIPS32-DAG: splat.d [[R4:\$w[0-9]+]], [[R3]][$4]
|
||||||
|
; MIPS32-DAG: st.d [[R4]], 0([[R2]])
|
||||||
|
; MIPS32-NOT: vshf.d
|
||||||
|
|
||||||
define void @llvm_mips_splat_d_imm_test() {
|
define void @llvm_mips_splat_d_imm_test() {
|
||||||
entry:
|
entry:
|
||||||
|
@ -110,6 +111,11 @@ entry:
|
||||||
store volatile<2 x i64> %0, <2 x i64>* @llvm_mips_splat_d_RES
|
store volatile<2 x i64> %0, <2 x i64>* @llvm_mips_splat_d_RES
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
; CHECK-LABEL: llvm_mips_splat_d_imm_test
|
; MIPS32-LABEL: llvm_mips_splat_d_imm_test
|
||||||
; CHECK: splati. d {{.*}}, {{.*}}[0]
|
; MIPS32-DAG: lw [[R0:\$[0-9]+]], %got(
|
||||||
; CHECK-NOT: vshf.d
|
; MIPS32-DAG: addiu [[R1:\$[0-9]+]], [[R0]], %lo(
|
||||||
|
; MIPS32-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_splat_d_RES)(
|
||||||
|
; MIPS32-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
|
||||||
|
; MIPS32-DAG: splati.d [[R4:\$w[0-9]+]], [[R3]][0]
|
||||||
|
; MIPS32-DAG: st.d [[R4]], 0([[R2]])
|
||||||
|
; MIPS32-NOT: vshf.d
|
||||||
|
|
Loading…
Reference in New Issue