Codegen: Fix llvm.*absdiff semantic.

Fixes the overflow case of llvm.*absdiff intrinsic also updats the tests and LangRef.rst accordingly.

Differential Revision: http://reviews.llvm.org/D11678

llvm-svn: 248483
This commit is contained in:
Mohammad Shahid 2015-09-24 10:35:03 +00:00
parent 2720593ab4
commit 13f1dfdf2e
4 changed files with 245 additions and 26 deletions

View File

@ -10920,16 +10920,19 @@ This is an overloaded intrinsic. The loaded data is a vector of any integer bit
Overview:
"""""""""
The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference of
the two operands, treating them both as unsigned integers.
The ``llvm.uabsdiff`` intrinsic returns a vector result of the absolute difference
of the two operands, treating them both as unsigned integers. The intermediate
calculations are computed using infinitely precise unsigned arithmetic. The final
result will be truncated to the given type.
The ``llvm.sabsdiff`` intrinsic returns a vector result of the absolute difference of
the two operands, treating them both as signed integers.
the two operands, treating them both as signed integers. If the result overflows, the
behavior is undefined.
.. note::
These intrinsics are primarily used during the code generation stage of compilation.
They are generated by compiler passes such as the Loop and SLP vectorizers.it is not
They are generated by compiler passes such as the Loop and SLP vectorizers. It is not
recommended for users to create them manually.
Arguments:
@ -10946,19 +10949,19 @@ The expression::
is equivalent to::
%sub = sub <4 x i32> %a, %b
%ispos = icmp ugt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg = sub <4 x i32> zeroinitializer, %sub
%1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg
%1 = zext <4 x i32> %a to <4 x i64>
%2 = zext <4 x i32> %b to <4 x i64>
%sub = sub <4 x i64> %1, %2
%trunc = trunc <4 x i64> to <4 x i32>
Similarly the expression::
and the expression::
call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a, <4 x i32> %b)
is equivalent to::
%sub = sub nsw <4 x i32> %a, %b
%ispos = icmp sgt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
%ispos = icmp sge <4 x i32> %sub, zeroinitializer
%neg = sub nsw <4 x i32> zeroinitializer, %sub
%1 = select <4 x i1> %ispos, <4 x i32> %sub, <4 x i32> %neg

View File

@ -723,24 +723,30 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
SDValue VectorLegalizer::ExpandABSDIFF(SDValue Op) {
SDLoc dl(Op);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
EVT VT = Op.getValueType();
SDNodeFlags Flags;
Flags.setNoSignedWrap(Op->getOpcode() == ISD::SABSDIFF);
Tmp2 = Op.getOperand(0);
Tmp3 = Op.getOperand(1);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp3, &Flags);
Tmp2 =
DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Tmp1, &Flags);
Tmp4 = DAG.getNode(
ISD::SETCC, dl,
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Tmp2,
DAG.getConstant(0, dl, VT),
DAG.getCondCode(Op->getOpcode() == ISD::SABSDIFF ? ISD::SETLT
: ISD::SETULT));
Tmp1 = DAG.getNode(ISD::VSELECT, dl, VT, Tmp4, Tmp1, Tmp2);
return Tmp1;
// For unsigned intrinsic, promote the type to handle unsigned overflow.
bool isUabsdiff = (Op->getOpcode() == ISD::UABSDIFF);
if (isUabsdiff) {
VT = VT.widenIntegerVectorElementType(*DAG.getContext());
Op0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op0);
Op1 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op1);
}
SDNodeFlags Flags;
Flags.setNoSignedWrap(!isUabsdiff);
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op0, Op1, &Flags);
if (isUabsdiff)
return DAG.getNode(ISD::TRUNCATE, dl, Op.getValueType(), Sub);
SDValue Cmp =
DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), VT),
Sub, DAG.getConstant(0, dl, VT), DAG.getCondCode(ISD::SETGE));
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Sub, &Flags);
return DAG.getNode(ISD::VSELECT, dl, VT, Cmp, Sub, Neg);
}
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {

View File

@ -0,0 +1,181 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
declare <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8>, <4 x i8>)
define <4 x i8> @test_uabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
; CHECK-LABEL: test_uabsdiff_v4i8_expand
; CHECK: pshufd
; CHECK: movd
; CHECK: subl
; CHECK: punpckldq
; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
; CHECK-DAG: movd %xmm0, [[DST:%.*]]
; CHECK: subl [[SRC]], [[DST]]
; CHECK: movd
; CHECK: pshufd
; CHECK: movd
; CHECK: punpckldq
; CHECK: movdqa
; CHECK: retq
%1 = call <4 x i8> @llvm.uabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
ret <4 x i8> %1
}
declare <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8>, <4 x i8>)
define <4 x i8> @test_sabsdiff_v4i8_expand(<4 x i8> %a1, <4 x i8> %a2) {
; CHECK-LABEL: test_sabsdiff_v4i8_expand
; CHECK: psubd
; CHECK: pcmpgtd
; CHECK: pcmpeqd
; CHECK: pxor
; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
; CHECK: por [[SRC2]], [[DST]]
; CHECK: retq
%1 = call <4 x i8> @llvm.sabsdiff.v4i8(<4 x i8> %a1, <4 x i8> %a2)
ret <4 x i8> %1
}
declare <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_sabsdiff_v8i8_expand(<8 x i8> %a1, <8 x i8> %a2) {
; CHECK-LABEL: test_sabsdiff_v8i8_expand
; CHECK: psubw
; CHECK: pcmpgtw
; CHECK: pcmpeqd
; CHECK: pxor
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
; CHECK: por [[SRC2]], [[DST]]
; CHECK: retq
%1 = call <8 x i8> @llvm.sabsdiff.v8i8(<8 x i8> %a1, <8 x i8> %a2)
ret <8 x i8> %1
}
declare <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_uabsdiff_v16i8_expand(<16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: test_uabsdiff_v16i8_expand
; CHECK: movd
; CHECK: movzbl
; CHECK: movzbl
; CHECK: subl
; CHECK: punpcklbw
; CHECK: retq
%1 = call <16 x i8> @llvm.uabsdiff.v16i8(<16 x i8> %a1, <16 x i8> %a2)
ret <16 x i8> %1
}
declare <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_uabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
; CHECK-LABEL: test_uabsdiff_v8i16_expand
; CHECK: pextrw
; CHECK: pextrw
; CHECK: subl
; CHECK: punpcklwd
; CHECK: retq
%1 = call <8 x i16> @llvm.uabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
ret <8 x i16> %1
}
declare <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_sabsdiff_v8i16_expand(<8 x i16> %a1, <8 x i16> %a2) {
; CHECK-LABEL: test_sabsdiff_v8i16_expand
; CHECK: psubw
; CHECK: pcmpgtw
; CHECK: pcmpeqd
; CHECK: pxor
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
; CHECK: por [[SRC2]], [[DST]]
; CHECK: retq
%1 = call <8 x i16> @llvm.sabsdiff.v8i16(<8 x i16> %a1, <8 x i16> %a2)
ret <8 x i16> %1
}
declare <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_sabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
; CHECK-LABEL: test_sabsdiff_v4i32_expand
; CHECK: psubd
; CHECK: pcmpgtd
; CHECK: pcmpeqd
; CHECK: pxor
; CHECK-DAG: psubd {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
; CHECK: por [[SRC2]], [[DST]]
; CHECK: retq
%1 = call <4 x i32> @llvm.sabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
ret <4 x i32> %1
}
declare <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_uabsdiff_v4i32_expand(<4 x i32> %a1, <4 x i32> %a2) {
; CHECK-LABEL: test_uabsdiff_v4i32_expand
; CHECK: pshufd
; CHECK: movd
; CHECK: subl
; CHECK: punpckldq
; CHECK-DAG: movd %xmm1, [[SRC:%.*]]
; CHECK-DAG: movd %xmm0, [[DST:%.*]]
; CHECK: subl [[SRC]], [[DST]]
; CHECK: movd
; CHECK: pshufd
; CHECK: movd
; CHECK: punpckldq
; CHECK: movdqa
; CHECK: retq
%1 = call <4 x i32> @llvm.uabsdiff.v4i32(<4 x i32> %a1, <4 x i32> %a2)
ret <4 x i32> %1
}
declare <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_sabsdiff_v2i32_expand(<2 x i32> %a1, <2 x i32> %a2) {
; CHECK-LABEL: test_sabsdiff_v2i32_expand
; CHECK: psubq
; CHECK: pcmpgtd
; CHECK: pcmpeqd
; CHECK: pxor
; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
; CHECK: por [[SRC2]], [[DST]]
; CHECK: retq
%1 = call <2 x i32> @llvm.sabsdiff.v2i32(<2 x i32> %a1, <2 x i32> %a2)
ret <2 x i32> %1
}
declare <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_sabsdiff_v2i64_expand(<2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: test_sabsdiff_v2i64_expand
; CHECK: psubq
; CHECK: pcmpgtd
; CHECK: pcmpeqd
; CHECK: pxor
; CHECK-DAG: psubq {{%xmm[0-9]+}}, [[SRC1:%xmm[0-9]+]]
; CHECK-DAG: pandn {{%xmm[0-9]+}}, [[SRC2:%xmm[0-9]+]]
; CHECK-DAG: pandn [[SRC1]], [[DST:%xmm[0-9]+]]
; CHECK: por [[SRC2]], [[DST]]
; CHECK: retq
%1 = call <2 x i64> @llvm.sabsdiff.v2i64(<2 x i64> %a1, <2 x i64> %a2)
ret <2 x i64> %1
}

View File

@ -0,0 +1,29 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
declare <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16>, <16 x i16>)
define <16 x i16> @test_sabsdiff_v16i16_expand(<16 x i16> %a1, <16 x i16> %a2) {
; CHECK-LABEL: test_sabsdiff_v16i16_expand:
; CHECK: # BB#0:
; CHECK: psubw
; CHECK: pxor
; CHECK: pcmpgtw
; CHECK: movdqa
; CHECK: pandn
; CHECK: pxor
; CHECK: psubw
; CHECK: pcmpeqd
; CHECK: pxor
; CHECK: pandn
; CHECK: por
; CHECK: pcmpgtw
; CHECK-DAG: psubw {{%xmm[0-9]+}}, [[SRC:%xmm[0-9]+]]
; CHECK-DAG: pxor {{%xmm[0-9]+}}, [[DST:%xmm[0-9]+]]
; CHECK: pandn [[SRC]], [[DST]]
; CHECK: por
; CHECK: movdqa
; CHECK: retq
%1 = call <16 x i16> @llvm.sabsdiff.v16i16(<16 x i16> %a1, <16 x i16> %a2)
ret <16 x i16> %1
}