Revert r248483, r242546, r242545, and r242409 - absdiff intrinsics
After much discussion, ending here: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151123/315620.html it has been decided that, instead of having the vectorizer directly generate special absdiff and horizontal-add intrinsics, we'll recognize the relevant reduction patterns during CodeGen. Accordingly, these intrinsics are not needed (the operations they represent can be pattern matched, as is already done in some backends). Thus, we're backing these out in favor of the current development work. r248483 - Codegen: Fix llvm.*absdiff semantic. r242546 - [ARM] Use [SU]ABSDIFF nodes instead of intrinsics for VABD/VABA r242545 - [AArch64] Use [SU]ABSDIFF nodes instead of intrinsics for ABD/ABA r242409 - [Codegen] Add intrinsics 'absdiff' and corresponding SDNodes for absolute difference operation llvm-svn: 255387
This commit is contained in:
parent
515f8df3f1
commit
cd8664c3c2
|
@ -11217,68 +11217,6 @@ Examples:
|
||||||
|
|
||||||
%r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c
|
%r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c
|
||||||
|
|
||||||
|
|
||||||
'``llvm.uabsdiff.*``' and '``llvm.sabsdiff.*``' Intrinsics
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
Syntax:
|
|
||||||
"""""""
|
|
||||||
This is an overloaded intrinsic. The loaded data is a vector of any integer bit width.
|
|
||||||
|
|
||||||
.. code-block:: llvm
|
|
||||||
|
|
||||||
declare <4 x integer> @llvm.uabsdiff.v4i32(<4 x integer> %a, <4 x integer> %b)
|
|
||||||
|
|
||||||
|
|
||||||
Overview:
|
|
||||||
"""""""""
|
|
||||||
|
|
||||||
The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference
|
|
||||||
of the two operands, treating them both as unsigned integers. The intermediate
|
|
||||||
calculations are computed using infinitely precise unsigned arithmetic. The final
|
|
||||||
result will be truncated to the given type.
|
|
||||||
|
|
||||||
The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of
|
|
||||||
the two operands, treating them both as signed integers. If the result overflows, the
|
|
||||||
behavior is undefined.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
These intrinsics are primarily used during the code generation stage of compilation.
|
|
||||||
They are generated by compiler passes such as the Loop and SLP vectorizers. It is not
|
|
||||||
recommended for users to create them manually.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
""""""""""
|
|
||||||
|
|
||||||
Both intrinsics take two integer of the same bitwidth.
|
|
||||||
|
|
||||||
Semantics:
|
|
||||||
""""""""""
|
|
||||||
|
|
||||||
The expression::
|
|
||||||
|
|
||||||
call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
|
|
||||||
|
|
||||||
is equivalent to::
|
|
||||||
|
|
||||||
%1 = zext <4 x i32> %a to <4 x i64>
|
|
||||||
%2 = zext <4 x i32> %b to <4 x i64>
|
|
||||||
%sub = sub <4 x i64> %1, %2
|
|
||||||
%trunc = trunc <4 x i64> to <4 x i32>
|
|
||||||
|
|
||||||
and the expression::
|
|
||||||
|
|
||||||
call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
|
|
||||||
|
|
||||||
is equivalent to::
|
|
||||||
|
|
||||||
%sub = sub nsw <4 x i32> %a, %b
|
|
||||||
%ispos = icmp sge <4 x i32> %sub, zeroinitializer
|
|
||||||
%neg = sub nsw <4 x i32> zeroinitializer, %sub
|
|
||||||
%1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
|
|
||||||
|
|
||||||
|
|
||||||
Half Precision Floating Point Intrinsics
|
Half Precision Floating Point Intrinsics
|
||||||
----------------------------------------
|
----------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -338,10 +338,6 @@ namespace ISD {
|
||||||
/// Byte Swap and Counting operators.
|
/// Byte Swap and Counting operators.
|
||||||
BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
|
BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
|
||||||
|
|
||||||
/// [SU]ABSDIFF - Signed/Unsigned absolute difference of two input integer
|
|
||||||
/// vector. These nodes are generated from llvm.*absdiff* intrinsics.
|
|
||||||
SABSDIFF, UABSDIFF,
|
|
||||||
|
|
||||||
/// Bit counting operators with an undefined result for zero inputs.
|
/// Bit counting operators with an undefined result for zero inputs.
|
||||||
CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
|
CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
|
||||||
|
|
||||||
|
|
|
@ -631,12 +631,6 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
|
||||||
def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
|
def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
|
||||||
[], "llvm.clear_cache">;
|
[], "llvm.clear_cache">;
|
||||||
|
|
||||||
// Calculate the Absolute Differences of the two input vectors.
|
|
||||||
def int_sabsdiff : Intrinsic<[llvm_anyvector_ty],
|
|
||||||
[ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
|
|
||||||
def int_uabsdiff : Intrinsic<[llvm_anyvector_ty],
|
|
||||||
[ LLVMMatchType<0>, LLVMMatchType<0> ], [IntrNoMem]>;
|
|
||||||
|
|
||||||
//===-------------------------- Masked Intrinsics -------------------------===//
|
//===-------------------------- Masked Intrinsics -------------------------===//
|
||||||
//
|
//
|
||||||
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
|
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
|
||||||
|
|
|
@ -396,8 +396,6 @@ def smax : SDNode<"ISD::SMAX" , SDTIntBinOp>;
|
||||||
def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>;
|
def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>;
|
||||||
def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>;
|
def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>;
|
||||||
|
|
||||||
def sabsdiff : SDNode<"ISD::SABSDIFF" , SDTIntBinOp>;
|
|
||||||
def uabsdiff : SDNode<"ISD::UABSDIFF" , SDTIntBinOp>;
|
|
||||||
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
|
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
|
||||||
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
|
def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
|
||||||
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
|
def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>;
|
||||||
|
|
|
@ -147,10 +147,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
||||||
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
|
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
|
||||||
Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
|
Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
|
||||||
break;
|
break;
|
||||||
case ISD::UABSDIFF:
|
|
||||||
case ISD::SABSDIFF:
|
|
||||||
Res = PromoteIntRes_SimpleIntBinOp(N);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the result is null then the sub-method took care of registering it.
|
// If the result is null then the sub-method took care of registering it.
|
||||||
|
|
|
@ -105,7 +105,6 @@ class VectorLegalizer {
|
||||||
SDValue ExpandLoad(SDValue Op);
|
SDValue ExpandLoad(SDValue Op);
|
||||||
SDValue ExpandStore(SDValue Op);
|
SDValue ExpandStore(SDValue Op);
|
||||||
SDValue ExpandFNEG(SDValue Op);
|
SDValue ExpandFNEG(SDValue Op);
|
||||||
SDValue ExpandABSDIFF(SDValue Op);
|
|
||||||
|
|
||||||
/// \brief Implements vector promotion.
|
/// \brief Implements vector promotion.
|
||||||
///
|
///
|
||||||
|
@ -330,8 +329,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||||
case ISD::SMAX:
|
case ISD::SMAX:
|
||||||
case ISD::UMIN:
|
case ISD::UMIN:
|
||||||
case ISD::UMAX:
|
case ISD::UMAX:
|
||||||
case ISD::UABSDIFF:
|
|
||||||
case ISD::SABSDIFF:
|
|
||||||
QueryType = Node->getValueType(0);
|
QueryType = Node->getValueType(0);
|
||||||
break;
|
break;
|
||||||
case ISD::FP_ROUND_INREG:
|
case ISD::FP_ROUND_INREG:
|
||||||
|
@ -718,42 +715,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
|
||||||
return ExpandFNEG(Op);
|
return ExpandFNEG(Op);
|
||||||
case ISD::SETCC:
|
case ISD::SETCC:
|
||||||
return UnrollVSETCC(Op);
|
return UnrollVSETCC(Op);
|
||||||
case ISD::UABSDIFF:
|
|
||||||
case ISD::SABSDIFF:
|
|
||||||
return ExpandABSDIFF(Op);
|
|
||||||
default:
|
default:
|
||||||
return DAG.UnrollVectorOp(Op.getNode());
|
return DAG.UnrollVectorOp(Op.getNode());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
|
|
||||||
SDLoc dl(Op);
|
|
||||||
SDValue Op0 = Op.getOperand(0);
|
|
||||||
SDValue Op1 = Op.getOperand(1);
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
|
|
||||||
// For unsigned intrinsic, promote the type to handle unsigned overflow.
|
|
||||||
bool isUabsdiff = (Op->getOpcode() == ISD::UABSDIFF);
|
|
||||||
if (isUabsdiff) {
|
|
||||||
VT = VT.widenIntegerVectorElementType(*DAG.getContext());
|
|
||||||
Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op0);
|
|
||||||
Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op1);
|
|
||||||
}
|
|
||||||
|
|
||||||
SDNodeFlags Flags;
|
|
||||||
Flags.setNoSignedWrap(!isUabsdiff);
|
|
||||||
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op0, Op1, &Flags);
|
|
||||||
if (isUabsdiff)
|
|
||||||
return DAG.getNode(ISD::TRUNCATE, dl, Op.getValueType(), Sub);
|
|
||||||
|
|
||||||
SDValue Cmp =
|
|
||||||
DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(),
|
|
||||||
*DAG.getContext(), VT),
|
|
||||||
Sub, DAG.getConstant(0, dl, VT), DAG.getCondCode(ISD::SETGE));
|
|
||||||
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Sub, &Flags);
|
|
||||||
return DAG.getNode(ISD::VSELECT, dl, VT, Cmp, Sub, Neg);
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
|
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
|
||||||
// Lower a select instruction where the condition is a scalar and the
|
// Lower a select instruction where the condition is a scalar and the
|
||||||
// operands are vectors. Lower this select to VSELECT and implement it
|
// operands are vectors. Lower this select to VSELECT and implement it
|
||||||
|
|
|
@ -684,8 +684,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
||||||
case ISD::SMAX:
|
case ISD::SMAX:
|
||||||
case ISD::UMIN:
|
case ISD::UMIN:
|
||||||
case ISD::UMAX:
|
case ISD::UMAX:
|
||||||
case ISD::UABSDIFF:
|
|
||||||
case ISD::SABSDIFF:
|
|
||||||
SplitVecRes_BinOp(N, Lo, Hi);
|
SplitVecRes_BinOp(N, Lo, Hi);
|
||||||
break;
|
break;
|
||||||
case ISD::FMA:
|
case ISD::FMA:
|
||||||
|
|
|
@ -4880,18 +4880,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
||||||
getValue(I.getArgOperand(0)).getValueType(),
|
getValue(I.getArgOperand(0)).getValueType(),
|
||||||
getValue(I.getArgOperand(0))));
|
getValue(I.getArgOperand(0))));
|
||||||
return nullptr;
|
return nullptr;
|
||||||
case Intrinsic::uabsdiff:
|
|
||||||
setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl,
|
|
||||||
getValue(I.getArgOperand(0)).getValueType(),
|
|
||||||
getValue(I.getArgOperand(0)),
|
|
||||||
getValue(I.getArgOperand(1))));
|
|
||||||
return nullptr;
|
|
||||||
case Intrinsic::sabsdiff:
|
|
||||||
setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl,
|
|
||||||
getValue(I.getArgOperand(0)).getValueType(),
|
|
||||||
getValue(I.getArgOperand(0)),
|
|
||||||
getValue(I.getArgOperand(1))));
|
|
||||||
return nullptr;
|
|
||||||
case Intrinsic::cttz: {
|
case Intrinsic::cttz: {
|
||||||
SDValue Arg = getValue(I.getArgOperand(0));
|
SDValue Arg = getValue(I.getArgOperand(0));
|
||||||
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
|
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
|
||||||
|
|
|
@ -235,8 +235,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||||
case ISD::SHL_PARTS: return "shl_parts";
|
case ISD::SHL_PARTS: return "shl_parts";
|
||||||
case ISD::SRA_PARTS: return "sra_parts";
|
case ISD::SRA_PARTS: return "sra_parts";
|
||||||
case ISD::SRL_PARTS: return "srl_parts";
|
case ISD::SRL_PARTS: return "srl_parts";
|
||||||
case ISD::UABSDIFF: return "uabsdiff";
|
|
||||||
case ISD::SABSDIFF: return "sabsdiff";
|
|
||||||
|
|
||||||
// Conversion operators.
|
// Conversion operators.
|
||||||
case ISD::SIGN_EXTEND: return "sign_extend";
|
case ISD::SIGN_EXTEND: return "sign_extend";
|
||||||
|
|
|
@ -826,8 +826,7 @@ void TargetLoweringBase::initActions() {
|
||||||
setOperationAction(ISD::USUBO, VT, Expand);
|
setOperationAction(ISD::USUBO, VT, Expand);
|
||||||
setOperationAction(ISD::SMULO, VT, Expand);
|
setOperationAction(ISD::SMULO, VT, Expand);
|
||||||
setOperationAction(ISD::UMULO, VT, Expand);
|
setOperationAction(ISD::UMULO, VT, Expand);
|
||||||
setOperationAction(ISD::UABSDIFF, VT, Expand);
|
|
||||||
setOperationAction(ISD::SABSDIFF, VT, Expand);
|
|
||||||
setOperationAction(ISD::BITREVERSE, VT, Expand);
|
setOperationAction(ISD::BITREVERSE, VT, Expand);
|
||||||
|
|
||||||
// These library functions default to expand.
|
// These library functions default to expand.
|
||||||
|
|
|
@ -691,12 +691,10 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
|
||||||
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
|
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
|
||||||
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
|
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
|
||||||
|
|
||||||
// [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
|
// [SU][MIN|MAX] are available for all NEON types apart from i64.
|
||||||
// i64.
|
|
||||||
if (!VT.isFloatingPoint() &&
|
if (!VT.isFloatingPoint() &&
|
||||||
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
|
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
|
||||||
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
|
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
|
||||||
ISD::SABSDIFF, ISD::UABSDIFF})
|
|
||||||
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
|
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
|
||||||
|
|
||||||
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
|
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
|
||||||
|
@ -8251,15 +8249,14 @@ static SDValue performAddSubLongCombine(SDNode *N,
|
||||||
// (aarch64_neon_umull (extract_high (v2i64 vec)))
|
// (aarch64_neon_umull (extract_high (v2i64 vec)))
|
||||||
// (extract_high (v2i64 (dup128 scalar)))))
|
// (extract_high (v2i64 (dup128 scalar)))))
|
||||||
//
|
//
|
||||||
static SDValue tryCombineLongOpWithDup(SDNode *N,
|
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
|
||||||
TargetLowering::DAGCombinerInfo &DCI,
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
if (DCI.isBeforeLegalizeOps())
|
if (DCI.isBeforeLegalizeOps())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
|
SDValue LHS = N->getOperand(1);
|
||||||
SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
|
SDValue RHS = N->getOperand(2);
|
||||||
SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
|
|
||||||
assert(LHS.getValueType().is64BitVector() &&
|
assert(LHS.getValueType().is64BitVector() &&
|
||||||
RHS.getValueType().is64BitVector() &&
|
RHS.getValueType().is64BitVector() &&
|
||||||
"unexpected shape for long operation");
|
"unexpected shape for long operation");
|
||||||
|
@ -8277,13 +8274,8 @@ static SDValue tryCombineLongOpWithDup(SDNode *N,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
// N could either be an intrinsic or a sabsdiff/uabsdiff node.
|
|
||||||
if (IsIntrinsic)
|
|
||||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
|
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
|
||||||
N->getOperand(0), LHS, RHS);
|
N->getOperand(0), LHS, RHS);
|
||||||
else
|
|
||||||
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
|
|
||||||
LHS, RHS);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
|
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
|
||||||
|
@ -8401,12 +8393,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
||||||
case Intrinsic::aarch64_neon_fmin:
|
case Intrinsic::aarch64_neon_fmin:
|
||||||
return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
|
return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
|
||||||
N->getOperand(1), N->getOperand(2));
|
N->getOperand(1), N->getOperand(2));
|
||||||
case Intrinsic::aarch64_neon_sabd:
|
|
||||||
return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
|
|
||||||
N->getOperand(1), N->getOperand(2));
|
|
||||||
case Intrinsic::aarch64_neon_uabd:
|
|
||||||
return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
|
|
||||||
N->getOperand(1), N->getOperand(2));
|
|
||||||
case Intrinsic::aarch64_neon_fmaxnm:
|
case Intrinsic::aarch64_neon_fmaxnm:
|
||||||
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
|
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
|
||||||
N->getOperand(1), N->getOperand(2));
|
N->getOperand(1), N->getOperand(2));
|
||||||
|
@ -8417,7 +8403,7 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
||||||
case Intrinsic::aarch64_neon_umull:
|
case Intrinsic::aarch64_neon_umull:
|
||||||
case Intrinsic::aarch64_neon_pmull:
|
case Intrinsic::aarch64_neon_pmull:
|
||||||
case Intrinsic::aarch64_neon_sqdmull:
|
case Intrinsic::aarch64_neon_sqdmull:
|
||||||
return tryCombineLongOpWithDup(N, DCI, DAG);
|
return tryCombineLongOpWithDup(IID, N, DCI, DAG);
|
||||||
case Intrinsic::aarch64_neon_sqshl:
|
case Intrinsic::aarch64_neon_sqshl:
|
||||||
case Intrinsic::aarch64_neon_uqshl:
|
case Intrinsic::aarch64_neon_uqshl:
|
||||||
case Intrinsic::aarch64_neon_sqshlu:
|
case Intrinsic::aarch64_neon_sqshlu:
|
||||||
|
@ -8442,16 +8428,19 @@ static SDValue performExtendCombine(SDNode *N,
|
||||||
// helps the backend to decide that an sabdl2 would be useful, saving a real
|
// helps the backend to decide that an sabdl2 would be useful, saving a real
|
||||||
// extract_high operation.
|
// extract_high operation.
|
||||||
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
|
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
|
||||||
(N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
|
N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
|
||||||
N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
|
|
||||||
SDNode *ABDNode = N->getOperand(0).getNode();
|
SDNode *ABDNode = N->getOperand(0).getNode();
|
||||||
SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
|
unsigned IID = getIntrinsicID(ABDNode);
|
||||||
|
if (IID == Intrinsic::aarch64_neon_sabd ||
|
||||||
|
IID == Intrinsic::aarch64_neon_uabd) {
|
||||||
|
SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
|
||||||
if (!NewABD.getNode())
|
if (!NewABD.getNode())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
|
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
|
||||||
NewABD);
|
NewABD);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// This is effectively a custom type legalization for AArch64.
|
// This is effectively a custom type legalization for AArch64.
|
||||||
//
|
//
|
||||||
|
|
|
@ -2633,7 +2633,7 @@ defm FMOV : FPMoveImmediate<"fmov">;
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
|
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
|
||||||
uabsdiff>;
|
int_aarch64_neon_uabd>;
|
||||||
// Match UABDL in log2-shuffle patterns.
|
// Match UABDL in log2-shuffle patterns.
|
||||||
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
|
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
|
||||||
(v8i16 (add (sub (zext (v8i8 V64:$opA)),
|
(v8i16 (add (sub (zext (v8i8 V64:$opA)),
|
||||||
|
@ -2905,8 +2905,8 @@ defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
|
||||||
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
|
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
|
||||||
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
|
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
|
||||||
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
|
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
|
||||||
TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >;
|
TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
|
||||||
defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>;
|
defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
|
||||||
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
|
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
|
||||||
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
|
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
|
||||||
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
|
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
|
||||||
|
@ -2924,8 +2924,8 @@ defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
|
||||||
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
|
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
|
||||||
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
|
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
|
||||||
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
|
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
|
||||||
TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >;
|
TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
|
||||||
defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>;
|
defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
|
||||||
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
|
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
|
||||||
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
|
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
|
||||||
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
|
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
|
||||||
|
@ -3427,9 +3427,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn
|
||||||
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
|
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
|
||||||
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
|
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
|
||||||
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
|
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
|
||||||
sabsdiff>;
|
int_aarch64_neon_sabd>;
|
||||||
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
|
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
|
||||||
sabsdiff>;
|
int_aarch64_neon_sabd>;
|
||||||
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
|
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
|
||||||
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
|
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
|
||||||
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
|
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
|
||||||
|
@ -3450,7 +3450,7 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
|
||||||
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
|
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
|
||||||
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
|
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
|
||||||
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
|
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
|
||||||
uabsdiff>;
|
int_aarch64_neon_uabd>;
|
||||||
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
|
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
|
||||||
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
|
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
|
||||||
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
|
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
|
||||||
|
|
|
@ -143,15 +143,10 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
|
||||||
setOperationAction(ISD::UREM, VT, Expand);
|
setOperationAction(ISD::UREM, VT, Expand);
|
||||||
setOperationAction(ISD::FREM, VT, Expand);
|
setOperationAction(ISD::FREM, VT, Expand);
|
||||||
|
|
||||||
if (VT.isInteger()) {
|
|
||||||
setOperationAction(ISD::SABSDIFF, VT, Legal);
|
|
||||||
setOperationAction(ISD::UABSDIFF, VT, Legal);
|
|
||||||
}
|
|
||||||
if (!VT.isFloatingPoint() &&
|
if (!VT.isFloatingPoint() &&
|
||||||
VT != MVT::v2i64 && VT != MVT::v1i64)
|
VT != MVT::v2i64 && VT != MVT::v1i64)
|
||||||
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
|
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
|
||||||
setOperationAction(Opcode, VT, Legal);
|
setOperationAction(Opcode, VT, Legal);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
|
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
|
||||||
|
@ -10148,15 +10143,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
|
||||||
// Don't do anything for most intrinsics.
|
// Don't do anything for most intrinsics.
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vabds:
|
|
||||||
if (!N->getValueType(0).isInteger())
|
|
||||||
return SDValue();
|
|
||||||
return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
|
|
||||||
N->getOperand(1), N->getOperand(2));
|
|
||||||
case Intrinsic::arm_neon_vabdu:
|
|
||||||
return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
|
|
||||||
N->getOperand(1), N->getOperand(2));
|
|
||||||
|
|
||||||
// Vector shifts: check for immediate versions and lower them.
|
// Vector shifts: check for immediate versions and lower them.
|
||||||
// Note: This is done during DAG combining instead of DAG legalizing because
|
// Note: This is done during DAG combining instead of DAG legalizing because
|
||||||
// the build_vectors for 64-bit vector element shift counts are generally
|
// the build_vectors for 64-bit vector element shift counts are generally
|
||||||
|
|
|
@ -4994,10 +4994,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
|
||||||
// VABD : Vector Absolute Difference
|
// VABD : Vector Absolute Difference
|
||||||
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
|
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
|
||||||
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
|
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
|
||||||
"vabd", "s", sabsdiff, 1>;
|
"vabd", "s", int_arm_neon_vabds, 1>;
|
||||||
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
|
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
|
||||||
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
|
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
|
||||||
"vabd", "u", uabsdiff, 1>;
|
"vabd", "u", int_arm_neon_vabdu, 1>;
|
||||||
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
|
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
|
||||||
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
|
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
|
||||||
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
|
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
|
||||||
|
@ -5005,9 +5005,9 @@ def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
|
||||||
|
|
||||||
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
|
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
|
||||||
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
|
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
|
||||||
"vabdl", "s", sabsdiff, zext, 1>;
|
"vabdl", "s", int_arm_neon_vabds, zext, 1>;
|
||||||
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
|
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
|
||||||
"vabdl", "u", uabsdiff, zext, 1>;
|
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
|
||||||
|
|
||||||
def abd_shr :
|
def abd_shr :
|
||||||
PatFrag<(ops node:$in1, node:$in2, node:$shift),
|
PatFrag<(ops node:$in1, node:$in2, node:$shift),
|
||||||
|
@ -5034,15 +5034,15 @@ def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$
|
||||||
|
|
||||||
// VABA : Vector Absolute Difference and Accumulate
|
// VABA : Vector Absolute Difference and Accumulate
|
||||||
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
|
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
|
||||||
"vaba", "s", sabsdiff, add>;
|
"vaba", "s", int_arm_neon_vabds, add>;
|
||||||
defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
|
defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
|
||||||
"vaba", "u", uabsdiff, add>;
|
"vaba", "u", int_arm_neon_vabdu, add>;
|
||||||
|
|
||||||
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
|
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
|
||||||
defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
|
defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
|
||||||
"vabal", "s", sabsdiff, zext, add>;
|
"vabal", "s", int_arm_neon_vabds, zext, add>;
|
||||||
defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
|
defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
|
||||||
"vabal", "u", uabsdiff, zext, add>;
|
"vabal", "u", int_arm_neon_vabdu, zext, add>;
|
||||||
|
|
||||||
// Vector Maximum and Minimum.
|
// Vector Maximum and Minimum.
|
||||||
|
|
||||||
|
|
|
@ -1,181 +0,0 @@
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
|
||||||
|
|
||||||
declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
|
|
||||||
|
|
||||||
define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
|
|
||||||
; CHECK-LABEL: test_uabsdiff_v4i8_expand
|
|
||||||
; CHECK: pshufd
|
|
||||||
; CHECK: movd
|
|
||||||
; CHECK: subl
|
|
||||||
; CHECK: punpckldq
|
|
||||||
; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
|
|
||||||
; CHECK-DAG: movd %xmm0, [[DST:%.*]]
|
|
||||||
; CHECK: subl [[SRC]], [[DST]]
|
|
||||||
; CHECK: movd
|
|
||||||
; CHECK: pshufd
|
|
||||||
; CHECK: movd
|
|
||||||
; CHECK: punpckldq
|
|
||||||
; CHECK: movdqa
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
|
|
||||||
ret <4 x i8> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
|
|
||||||
|
|
||||||
define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
|
|
||||||
; CHECK-LABEL: test_sabsdiff_v4i8_expand
|
|
||||||
; CHECK: psubd
|
|
||||||
; CHECK: pcmpgtd
|
|
||||||
; CHECK: pcmpeqd
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
|
||||||
; CHECK: por [[SRC2]], [[DST]]
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
|
|
||||||
ret <4 x i8> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
|
|
||||||
|
|
||||||
define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
|
|
||||||
; CHECK-LABEL: test_sabsdiff_v8i8_expand
|
|
||||||
; CHECK: psubw
|
|
||||||
; CHECK: pcmpgtw
|
|
||||||
; CHECK: pcmpeqd
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
|
||||||
; CHECK: por [[SRC2]], [[DST]]
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
|
|
||||||
ret <8 x i8> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
|
|
||||||
|
|
||||||
define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
|
|
||||||
; CHECK-LABEL: test_uabsdiff_v16i8_expand
|
|
||||||
; CHECK: movd
|
|
||||||
; CHECK: movzbl
|
|
||||||
; CHECK: movzbl
|
|
||||||
; CHECK: subl
|
|
||||||
; CHECK: punpcklbw
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
|
|
||||||
ret <16 x i8> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
|
|
||||||
|
|
||||||
define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
|
|
||||||
; CHECK-LABEL: test_uabsdiff_v8i16_expand
|
|
||||||
; CHECK: pextrw
|
|
||||||
; CHECK: pextrw
|
|
||||||
; CHECK: subl
|
|
||||||
; CHECK: punpcklwd
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
|
|
||||||
ret <8 x i16> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
|
|
||||||
|
|
||||||
define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
|
|
||||||
; CHECK-LABEL: test_sabsdiff_v8i16_expand
|
|
||||||
; CHECK: psubw
|
|
||||||
; CHECK: pcmpgtw
|
|
||||||
; CHECK: pcmpeqd
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
|
||||||
; CHECK: por [[SRC2]], [[DST]]
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
|
|
||||||
ret <8 x i16> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
|
|
||||||
|
|
||||||
define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
|
|
||||||
; CHECK-LABEL: test_sabsdiff_v4i32_expand
|
|
||||||
; CHECK: psubd
|
|
||||||
; CHECK: pcmpgtd
|
|
||||||
; CHECK: pcmpeqd
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
|
||||||
; CHECK: por [[SRC2]], [[DST]]
|
|
||||||
; CHECK: retq
|
|
||||||
%1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
|
|
||||||
ret <4 x i32> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
|
|
||||||
|
|
||||||
define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
|
|
||||||
; CHECK-LABEL: test_uabsdiff_v4i32_expand
|
|
||||||
; CHECK: pshufd
|
|
||||||
; CHECK: movd
|
|
||||||
; CHECK: subl
|
|
||||||
; CHECK: punpckldq
|
|
||||||
; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
|
|
||||||
; CHECK-DAG: movd %xmm0, [[DST:%.*]]
|
|
||||||
; CHECK: subl [[SRC]], [[DST]]
|
|
||||||
; CHECK: movd
|
|
||||||
; CHECK: pshufd
|
|
||||||
; CHECK: movd
|
|
||||||
; CHECK: punpckldq
|
|
||||||
; CHECK: movdqa
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
|
|
||||||
ret <4 x i32> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
|
|
||||||
|
|
||||||
define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
|
|
||||||
; CHECK-LABEL: test_sabsdiff_v2i32_expand
|
|
||||||
; CHECK: psubq
|
|
||||||
; CHECK: pcmpgtd
|
|
||||||
; CHECK: pcmpeqd
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
|
||||||
; CHECK: por [[SRC2]], [[DST]]
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
|
|
||||||
ret <2 x i32> %1
|
|
||||||
}
|
|
||||||
|
|
||||||
declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
|
|
||||||
|
|
||||||
define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
|
|
||||||
; CHECK-LABEL: test_sabsdiff_v2i64_expand
|
|
||||||
; CHECK: psubq
|
|
||||||
; CHECK: pcmpgtd
|
|
||||||
; CHECK: pcmpeqd
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
|
||||||
; CHECK: por [[SRC2]], [[DST]]
|
|
||||||
; CHECK: retq
|
|
||||||
|
|
||||||
%1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
|
|
||||||
ret <2 x i64> %1
|
|
||||||
}
|
|
|
@ -1,29 +0,0 @@
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
|
||||||
|
|
||||||
declare <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16>, <16 x i16>)
|
|
||||||
|
|
||||||
define <16 x i16> @test_sabsdiff_v16i16_expand(<16 x i16> %a1, <16 x i16> %a2) {
|
|
||||||
; CHECK-LABEL: test_sabsdiff_v16i16_expand:
|
|
||||||
; CHECK: # BB#0:
|
|
||||||
; CHECK: psubw
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK: pcmpgtw
|
|
||||||
; CHECK: movdqa
|
|
||||||
; CHECK: pandn
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK: psubw
|
|
||||||
; CHECK: pcmpeqd
|
|
||||||
; CHECK: pxor
|
|
||||||
; CHECK: pandn
|
|
||||||
; CHECK: por
|
|
||||||
; CHECK: pcmpgtw
|
|
||||||
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC:%xmm[0-9]+]]
|
|
||||||
; CHECK-DAG: pxor {{%xmm[0-9]+}}, [[DST:%xmm[0-9]+]]
|
|
||||||
; CHECK: pandn [[SRC]], [[DST]]
|
|
||||||
; CHECK: por
|
|
||||||
; CHECK: movdqa
|
|
||||||
; CHECK: retq
|
|
||||||
%1 = call <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16> %a1, <16 x i16> %a2)
|
|
||||||
ret <16 x i16> %1
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue