From 783490095000f20a61f18918132eeea7fd15eb80 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 Jan 2012 06:43:11 +0000 Subject: [PATCH] Custom lower PSIGN and PSHUFB intrinsics to their corresponding target specific nodes so we can remove the isel patterns. llvm-svn: 148933 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 +++ llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 24 +++--- llvm/lib/Target/X86/X86InstrSSE.td | 85 ++++++-------------- 4 files changed, 49 insertions(+), 74 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 77d0e12381c8..74b02ad3d985 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9342,6 +9342,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_pcmpgt_q: return DAG.getNode(X86ISD::PCMPGT, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_pshuf_b_128: + case Intrinsic::x86_avx2_pshuf_b: + return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_psign_b_128: + case Intrinsic::x86_ssse3_psign_w_128: + case Intrinsic::x86_ssse3_psign_d_128: + case Intrinsic::x86_avx2_psign_b: + case Intrinsic::x86_avx2_psign_w: + case Intrinsic::x86_avx2_psign_d: + return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 005a1f0aacac..937b5d781c84 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -48,7 +48,7 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86pshufb : SDNode<"X86ISD::PSHUFB", - SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 4b6e26bd8ab0..230aae049faa 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -741,10 +741,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, - { X86::PSHUFBrr128, X86::PSHUFBrm128, TB_ALIGN_16 }, - { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 }, - { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 }, - { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 }, + { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 }, + { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 }, + { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 }, + { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 }, { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, @@ -927,10 +927,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, - { X86::VPSHUFBrr128, X86::VPSHUFBrm128, TB_ALIGN_16 }, - { X86::VPSIGNBrr128, X86::VPSIGNBrm128, TB_ALIGN_16 }, - { X86::VPSIGNWrr128, X86::VPSIGNWrm128, TB_ALIGN_16 }, - { X86::VPSIGNDrr128, X86::VPSIGNDrm128, TB_ALIGN_16 }, + { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 }, + { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 }, + { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 }, + { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 }, { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, @@ -1069,10 +1069,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 }, { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 }, { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 }, - { X86::VPSHUFBrr256, X86::VPSHUFBrm256, TB_ALIGN_32 }, - { X86::VPSIGNBrr256, X86::VPSIGNBrm256, TB_ALIGN_32 }, - { X86::VPSIGNWrr256, X86::VPSIGNWrm256, TB_ALIGN_32 }, - { X86::VPSIGNDrr256, X86::VPSIGNDrm256, TB_ALIGN_32 }, + { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 }, + { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 }, + { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 }, + { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 }, { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 }, { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 }, { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 }, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 51b960104779..44dbaa96462c 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5148,20 +5148,20 @@ let isCommutable = 0 in { memopv2i64, i128mem, 0>, VEX_4V; defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, memopv2i64, i128mem, 0>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", - int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", - int_x86_ssse3_psign_b_128, 0>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", - int_x86_ssse3_psign_w_128, 0>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", - int_x86_ssse3_psign_d_128, 0>, VEX_4V; } defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; @@ -5177,20 +5177,20 @@ let isCommutable = 0 in { memopv4i64, i256mem, 0>, VEX_4V; defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, memopv4i64, i256mem, 0>, VEX_4V; + defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", int_x86_avx2_pmadd_ub_sw>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", - int_x86_avx2_pshuf_b>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", - int_x86_avx2_psign_b>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", - int_x86_avx2_psign_w>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", - int_x86_avx2_psign_d>, VEX_4V; } defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", int_x86_avx2_pmul_hr_sw>, VEX_4V; @@ -5207,62 +5207,25 @@ let isCommutable = 0 in { memopv2i64, i128mem>; defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, memopv2i64, i128mem>; + defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128, + memopv2i64, i128mem>; + defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128, + memopv2i64, i128mem>; + defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128, + memopv2i64, i128mem>; + defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, + memopv2i64, i128mem>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", int_x86_ssse3_phadd_sw_128>; defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", int_x86_ssse3_phsub_sw_128>; defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", int_x86_ssse3_pmadd_ub_sw_128>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", - int_x86_ssse3_pshuf_b_128>; - defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", - int_x86_ssse3_psign_b_128>; - defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", - int_x86_ssse3_psign_w_128>; - defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", - int_x86_ssse3_psign_d_128>; } defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw_128>; } -let Predicates = [HasAVX] in { - def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (VPSHUFBrr128 VR128:$src, VR128:$mask)>; - def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (VPSHUFBrm128 VR128:$src, addr:$mask)>; - - def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; -} - -let Predicates = [HasAVX2] in { - def : Pat<(v32i8 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNBrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNDrr256 VR256:$src1, VR256:$src2)>; -} - -let Predicates = [HasSSSE3] in { - def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>; - def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>; - - def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNDrr128 VR128:$src1, VR128:$src2)>; -} - //===---------------------------------------------------------------------===// // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===//