Custom lower PSIGN and PSHUFB intrinsics to their corresponding target specific nodes so we can remove the isel patterns.

llvm-svn: 148933
This commit is contained in:
Craig Topper 2012-01-25 06:43:11 +00:00
parent 80df922f2f
commit 7834900950
4 changed files with 49 additions and 74 deletions

View File

@ -9342,6 +9342,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::x86_avx2_pcmpgt_q:
return DAG.getNode(X86ISD::PCMPGT, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
case Intrinsic::x86_avx2_psign_b:
case Intrinsic::x86_avx2_psign_w:
case Intrinsic::x86_avx2_psign_d:
return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest

View File

@ -48,7 +48,7 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,

View File

@ -741,10 +741,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
{ X86::PORrr, X86::PORrm, TB_ALIGN_16 },
{ X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
{ X86::PSHUFBrr128, X86::PSHUFBrm128, TB_ALIGN_16 },
{ X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 },
{ X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 },
{ X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 },
{ X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
{ X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 },
{ X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 },
{ X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 },
{ X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
{ X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
{ X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
@ -927,10 +927,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
{ X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
{ X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
{ X86::VPSHUFBrr128, X86::VPSHUFBrm128, TB_ALIGN_16 },
{ X86::VPSIGNBrr128, X86::VPSIGNBrm128, TB_ALIGN_16 },
{ X86::VPSIGNWrr128, X86::VPSIGNWrm128, TB_ALIGN_16 },
{ X86::VPSIGNDrr128, X86::VPSIGNDrm128, TB_ALIGN_16 },
{ X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 },
{ X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 },
{ X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 },
{ X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 },
{ X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
{ X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
{ X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
@ -1069,10 +1069,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 },
{ X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 },
{ X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 },
{ X86::VPSHUFBrr256, X86::VPSHUFBrm256, TB_ALIGN_32 },
{ X86::VPSIGNBrr256, X86::VPSIGNBrm256, TB_ALIGN_32 },
{ X86::VPSIGNWrr256, X86::VPSIGNWrm256, TB_ALIGN_32 },
{ X86::VPSIGNDrr256, X86::VPSIGNDrm256, TB_ALIGN_32 },
{ X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 },
{ X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 },
{ X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 },
{ X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 },
{ X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 },
{ X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 },
{ X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 },

View File

@ -5148,20 +5148,20 @@ let isCommutable = 0 in {
memopv2i64, i128mem, 0>, VEX_4V;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb",
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
@ -5177,20 +5177,20 @@ let isCommutable = 0 in {
memopv4i64, i256mem, 0>, VEX_4V;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
memopv4i64, i256mem, 0>, VEX_4V;
defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
memopv4i64, i256mem, 0>, VEX_4V;
defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
memopv4i64, i256mem, 0>, VEX_4V;
defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
memopv4i64, i256mem, 0>, VEX_4V;
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
memopv4i64, i256mem, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw>, VEX_4V;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw>, VEX_4V;
defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
int_x86_avx2_pmadd_ub_sw>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb",
int_x86_avx2_pshuf_b>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb",
int_x86_avx2_psign_b>, VEX_4V;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw",
int_x86_avx2_psign_w>, VEX_4V;
defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd",
int_x86_avx2_psign_d>, VEX_4V;
}
defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
int_x86_avx2_pmul_hr_sw>, VEX_4V;
@ -5207,62 +5207,25 @@ let isCommutable = 0 in {
memopv2i64, i128mem>;
defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
memopv2i64, i128mem>;
defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128,
memopv2i64, i128mem>;
defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128,
memopv2i64, i128mem>;
defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128,
memopv2i64, i128mem>;
defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
memopv2i64, i128mem>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128>;
defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw_128>;
defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb",
int_x86_ssse3_pshuf_b_128>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb",
int_x86_ssse3_psign_b_128>;
defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw",
int_x86_ssse3_psign_w_128>;
defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd",
int_x86_ssse3_psign_d_128>;
}
defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw_128>;
}
let Predicates = [HasAVX] in {
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
(VPSHUFBrr128 VR128:$src, VR128:$mask)>;
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(VPSHUFBrm128 VR128:$src, addr:$mask)>;
def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
}
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (X86psign VR256:$src1, VR256:$src2)),
(VPSIGNBrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v16i16 (X86psign VR256:$src1, VR256:$src2)),
(VPSIGNWrr256 VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)),
(VPSIGNDrr256 VR256:$src1, VR256:$src2)>;
}
let Predicates = [HasSSSE3] in {
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
(PSHUFBrr128 VR128:$src, VR128:$mask)>;
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>;
def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNBrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNWrr128 VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNDrr128 VR128:$src1, VR128:$src2)>;
}
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//