[X86][XOP] Merge rotation opcodes with AVX512 equivalents. NFCI.

The XOP rotations act as ROTL with +ve values and ROTR with -ve values, which means that we can treat them all as ROTL with unsigned modulo. We already check that we're only trying to lower as ROTL for XOP rotations.

Differential Revision: https://reviews.llvm.org/D37949

llvm-svn: 314207
This commit is contained in:
Simon Pilgrim 2017-09-26 14:12:50 +00:00
parent 1d04b5bacf
commit dac6fd4170
5 changed files with 19 additions and 26 deletions

View File

@ -22667,7 +22667,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
assert((Opcode == ISD::ROTL) && "Only ROTL supported");
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
// Split 256-bit integers.
if (VT.is256BitVector())
@ -22680,13 +22680,13 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
assert(RotateAmt < EltSizeInBits && "Rotation out of range");
return DAG.getNode(X86ISD::VPROTI, DL, VT, R,
return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
DAG.getConstant(RotateAmt, DL, MVT::i8));
}
}
// Use general rotate by variable (per-element).
return DAG.getNode(X86ISD::VPROT, DL, VT, R, Amt);
return Op;
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
@ -24610,8 +24610,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::RDSEED: return "X86ISD::RDSEED";
case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
case X86ISD::VPROT: return "X86ISD::VPROT";
case X86ISD::VPROTI: return "X86ISD::VPROTI";
case X86ISD::VPSHA: return "X86ISD::VPSHA";
case X86ISD::VPSHL: return "X86ISD::VPSHL";
case X86ISD::VPCOM: return "X86ISD::VPCOM";

View File

@ -447,8 +447,6 @@ namespace llvm {
/// SSE4A Extraction and Insertion.
EXTRQI, INSERTQI,
// XOP variable/immediate rotations.
VPROT, VPROTI,
// XOP arithmetic/logical shifts.
VPSHA, VPSHL,
// XOP signed/unsigned integer comparisons.

View File

@ -213,9 +213,6 @@ def X86kshiftr : SDNode<"X86ISD::KSHIFTR",
def X86vrotli : SDNode<"X86ISD::VROTLI", X86vshiftimm>;
def X86vrotri : SDNode<"X86ISD::VROTRI", X86vshiftimm>;
def X86vprot : SDNode<"X86ISD::VPROT", X86vshiftvariable>;
def X86vproti : SDNode<"X86ISD::VPROTI", X86vshiftimm>;
def X86vpshl : SDNode<"X86ISD::VPSHL", X86vshiftvariable>;
def X86vpsha : SDNode<"X86ISD::VPSHA", X86vshiftvariable>;

View File

@ -115,10 +115,10 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let ExeDomain = SSEPackedInt in {
defm VPROTB : xop3op<0x90, "vprotb", X86vprot, v16i8>;
defm VPROTD : xop3op<0x92, "vprotd", X86vprot, v4i32>;
defm VPROTQ : xop3op<0x93, "vprotq", X86vprot, v2i64>;
defm VPROTW : xop3op<0x91, "vprotw", X86vprot, v8i16>;
defm VPROTB : xop3op<0x90, "vprotb", rotl, v16i8>;
defm VPROTD : xop3op<0x92, "vprotd", rotl, v4i32>;
defm VPROTQ : xop3op<0x93, "vprotq", rotl, v2i64>;
defm VPROTW : xop3op<0x91, "vprotw", rotl, v8i16>;
defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8>;
defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32>;
defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64>;
@ -144,10 +144,10 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let ExeDomain = SSEPackedInt in {
defm VPROTB : xop3opimm<0xC0, "vprotb", X86vproti, v16i8>;
defm VPROTD : xop3opimm<0xC2, "vprotd", X86vproti, v4i32>;
defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vproti, v2i64>;
defm VPROTW : xop3opimm<0xC1, "vprotw", X86vproti, v8i16>;
defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8>;
defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32>;
defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64>;
defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16>;
}
// Instruction where second source can be memory, but third must be register

View File

@ -1597,14 +1597,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),
X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0),
X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, X86ISD::VPROT, 0),
X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, X86ISD::VPROT, 0),
X86_INTRINSIC_DATA(xop_vprotwi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, ISD::ROTL, 0),
X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, ISD::ROTL, 0),
X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, ISD::ROTL, 0),
X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, ISD::ROTL, 0),
X86_INTRINSIC_DATA(xop_vprotwi, INTR_TYPE_2OP, X86ISD::VROTLI, 0),
X86_INTRINSIC_DATA(xop_vpshab, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
X86_INTRINSIC_DATA(xop_vpshad, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
X86_INTRINSIC_DATA(xop_vpshaq, INTR_TYPE_2OP, X86ISD::VPSHA, 0),