Codegen: Fix llvm.*absdiff semantic.
Fixes the overflow case of llvm.*absdiff intrinsic also updats the tests and LangRef.rst accordingly. Differential Revision: http://reviews.llvm.org/D11678 llvm-svn: 248483
This commit is contained in:
parent
2720593ab4
commit
13f1dfdf2e
|
@ -10920,16 +10920,19 @@ This is an overloaded intrinsic. The loaded data is a vector of any integer bit
|
|||
Overview:
|
||||
"""""""""
|
||||
|
||||
The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference of
|
||||
the two operands, treating them both as unsigned integers.
|
||||
The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference
|
||||
of the two operands, treating them both as unsigned integers. The intermediate
|
||||
calculations are computed using infinitely precise unsigned arithmetic. The final
|
||||
result will be truncated to the given type.
|
||||
|
||||
The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of
|
||||
the two operands, treating them both as signed integers.
|
||||
the two operands, treating them both as signed integers. If the result overflows, the
|
||||
behavior is undefined.
|
||||
|
||||
.. note::
|
||||
|
||||
These intrinsics are primarily used during the code generation stage of compilation.
|
||||
They are generated by compiler passes such as the Loop and SLP vectorizers.it is not
|
||||
They are generated by compiler passes such as the Loop and SLP vectorizers. It is not
|
||||
recommended for users to create them manually.
|
||||
|
||||
Arguments:
|
||||
|
@ -10946,19 +10949,19 @@ The expression::
|
|||
|
||||
is equivalent to::
|
||||
|
||||
%sub = sub <4 x i32> %a, %b
|
||||
%ispos = icmp ugt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%neg = sub <4 x i32> zeroinitializer, %sub
|
||||
%1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
|
||||
%1 = zext <4 x i32> %a to <4 x i64>
|
||||
%2 = zext <4 x i32> %b to <4 x i64>
|
||||
%sub = sub <4 x i64> %1, %2
|
||||
%trunc = trunc <4 x i64> to <4 x i32>
|
||||
|
||||
Similarly the expression::
|
||||
and the expression::
|
||||
|
||||
call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
|
||||
|
||||
is equivalent to::
|
||||
|
||||
%sub = sub nsw <4 x i32> %a, %b
|
||||
%ispos = icmp sgt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%ispos = icmp sge <4 x i32> %sub, zeroinitializer
|
||||
%neg = sub nsw <4 x i32> zeroinitializer, %sub
|
||||
%1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
|
||||
|
||||
|
|
|
@ -723,24 +723,30 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
|
|||
|
||||
SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
|
||||
SDLoc dl(Op);
|
||||
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
EVT VT = Op.getValueType();
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoSignedWrap(Op->getOpcode() == ISD::SABSDIFF);
|
||||
|
||||
Tmp2 = Op.getOperand(0);
|
||||
Tmp3 = Op.getOperand(1);
|
||||
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp3, &Flags);
|
||||
Tmp2 =
|
||||
DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Tmp1, &Flags);
|
||||
Tmp4 = DAG.getNode(
|
||||
ISD::SETCC, dl,
|
||||
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Tmp2,
|
||||
DAG.getConstant(0, dl, VT),
|
||||
DAG.getCondCode(Op->getOpcode() == ISD::SABSDIFF ? ISD::SETLT
|
||||
: ISD::SETULT));
|
||||
Tmp1 = DAG.getNode(ISD::VSELECT, dl, VT, Tmp4, Tmp1, Tmp2);
|
||||
return Tmp1;
|
||||
// For unsigned intrinsic, promote the type to handle unsigned overflow.
|
||||
bool isUabsdiff = (Op->getOpcode() == ISD::UABSDIFF);
|
||||
if (isUabsdiff) {
|
||||
VT = VT.widenIntegerVectorElementType(*DAG.getContext());
|
||||
Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op0);
|
||||
Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op1);
|
||||
}
|
||||
|
||||
SDNodeFlags Flags;
|
||||
Flags.setNoSignedWrap(!isUabsdiff);
|
||||
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op0, Op1, &Flags);
|
||||
if (isUabsdiff)
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, Op.getValueType(), Sub);
|
||||
|
||||
SDValue Cmp =
|
||||
DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(),
|
||||
*DAG.getContext(), VT),
|
||||
Sub, DAG.getConstant(0, dl, VT), DAG.getCondCode(ISD::SETGE));
|
||||
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Sub, &Flags);
|
||||
return DAG.getNode(ISD::VSELECT, dl, VT, Cmp, Sub, Neg);
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
|
||||
|
|
|
@ -0,0 +1,181 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
|
||||
declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
|
||||
|
||||
define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
|
||||
; CHECK-LABEL: test_uabsdiff_v4i8_expand
|
||||
; CHECK: pshufd
|
||||
; CHECK: movd
|
||||
; CHECK: subl
|
||||
; CHECK: punpckldq
|
||||
; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
|
||||
; CHECK-DAG: movd %xmm0, [[DST:%.*]]
|
||||
; CHECK: subl [[SRC]], [[DST]]
|
||||
; CHECK: movd
|
||||
; CHECK: pshufd
|
||||
; CHECK: movd
|
||||
; CHECK: punpckldq
|
||||
; CHECK: movdqa
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
|
||||
ret <4 x i8> %1
|
||||
}
|
||||
|
||||
declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
|
||||
|
||||
define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
|
||||
; CHECK-LABEL: test_sabsdiff_v4i8_expand
|
||||
; CHECK: psubd
|
||||
; CHECK: pcmpgtd
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pxor
|
||||
; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
||||
; CHECK: por [[SRC2]], [[DST]]
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
|
||||
ret <4 x i8> %1
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
|
||||
; CHECK-LABEL: test_sabsdiff_v8i8_expand
|
||||
; CHECK: psubw
|
||||
; CHECK: pcmpgtw
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pxor
|
||||
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
||||
; CHECK: por [[SRC2]], [[DST]]
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
|
||||
ret <8 x i8> %1
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
|
||||
; CHECK-LABEL: test_uabsdiff_v16i8_expand
|
||||
; CHECK: movd
|
||||
; CHECK: movzbl
|
||||
; CHECK: movzbl
|
||||
; CHECK: subl
|
||||
; CHECK: punpcklbw
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
|
||||
; CHECK-LABEL: test_uabsdiff_v8i16_expand
|
||||
; CHECK: pextrw
|
||||
; CHECK: pextrw
|
||||
; CHECK: subl
|
||||
; CHECK: punpcklwd
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
|
||||
; CHECK-LABEL: test_sabsdiff_v8i16_expand
|
||||
; CHECK: psubw
|
||||
; CHECK: pcmpgtw
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pxor
|
||||
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
||||
; CHECK: por [[SRC2]], [[DST]]
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: test_sabsdiff_v4i32_expand
|
||||
; CHECK: psubd
|
||||
; CHECK: pcmpgtd
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pxor
|
||||
; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
||||
; CHECK: por [[SRC2]], [[DST]]
|
||||
; CHECK: retq
|
||||
%1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: test_uabsdiff_v4i32_expand
|
||||
; CHECK: pshufd
|
||||
; CHECK: movd
|
||||
; CHECK: subl
|
||||
; CHECK: punpckldq
|
||||
; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
|
||||
; CHECK-DAG: movd %xmm0, [[DST:%.*]]
|
||||
; CHECK: subl [[SRC]], [[DST]]
|
||||
; CHECK: movd
|
||||
; CHECK: pshufd
|
||||
; CHECK: movd
|
||||
; CHECK: punpckldq
|
||||
; CHECK: movdqa
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
|
||||
|
||||
define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
|
||||
; CHECK-LABEL: test_sabsdiff_v2i32_expand
|
||||
; CHECK: psubq
|
||||
; CHECK: pcmpgtd
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pxor
|
||||
; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
||||
; CHECK: por [[SRC2]], [[DST]]
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
|
||||
ret <2 x i32> %1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
|
||||
; CHECK-LABEL: test_sabsdiff_v2i64_expand
|
||||
; CHECK: psubq
|
||||
; CHECK: pcmpgtd
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pxor
|
||||
; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
|
||||
; CHECK: por [[SRC2]], [[DST]]
|
||||
; CHECK: retq
|
||||
|
||||
%1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
|
||||
ret <2 x i64> %1
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
|
||||
declare <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16>, <16 x i16>)
|
||||
|
||||
define <16 x i16> @test_sabsdiff_v16i16_expand(<16 x i16> %a1, <16 x i16> %a2) {
|
||||
; CHECK-LABEL: test_sabsdiff_v16i16_expand:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK: psubw
|
||||
; CHECK: pxor
|
||||
; CHECK: pcmpgtw
|
||||
; CHECK: movdqa
|
||||
; CHECK: pandn
|
||||
; CHECK: pxor
|
||||
; CHECK: psubw
|
||||
; CHECK: pcmpeqd
|
||||
; CHECK: pxor
|
||||
; CHECK: pandn
|
||||
; CHECK: por
|
||||
; CHECK: pcmpgtw
|
||||
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC:%xmm[0-9]+]]
|
||||
; CHECK-DAG: pxor {{%xmm[0-9]+}}, [[DST:%xmm[0-9]+]]
|
||||
; CHECK: pandn [[SRC]], [[DST]]
|
||||
; CHECK: por
|
||||
; CHECK: movdqa
|
||||
; CHECK: retq
|
||||
%1 = call <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16> %a1, <16 x i16> %a2)
|
||||
ret <16 x i16> %1
|
||||
}
|
||||
|
Loading…
Reference in New Issue