[PowerPC]Exploit P9 vabsdu for unsigned vselect patterns

For type v4i32/v8ii16/v16i8, do following transforms:
  (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) -> (vabsd a, b)
  (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) -> (vabsd a, b)
  (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) -> (vabsd a, b)
  (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) -> (vabsd a, b)

Differential Revision: https://reviews.llvm.org/D55812

llvm-svn: 349599
This commit is contained in:
Kewen Lin 2018-12-19 03:04:07 +00:00
parent 5dcd70990e
commit a6247e7cf4
3 changed files with 150 additions and 36 deletions

View File

@ -1085,6 +1085,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasP9Altivec()) {
setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::VSELECT);
}
// Darwin long double math library functions have $LDBL128 appended.
@ -13267,6 +13268,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return DAGCombineBuildVector(N, DCI);
case ISD::ABS:
return combineABS(N, DCI);
case ISD::VSELECT:
return combineVSelect(N, DCI);
}
return SDValue();
@ -14597,3 +14600,65 @@ SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
return SDValue();
}
// For type v4i32/v8ii16/v16i8, transform
// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
SDValue PPCTargetLowering::combineVSelect(SDNode *N,
DAGCombinerInfo &DCI) const {
assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
assert(Subtarget.hasP9Altivec() &&
"Only combine this when P9 altivec supported!");
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
SDValue Cond = N->getOperand(0);
SDValue TrueOpnd = N->getOperand(1);
SDValue FalseOpnd = N->getOperand(2);
EVT VT = N->getOperand(1).getValueType();
if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
FalseOpnd.getOpcode() != ISD::SUB)
return SDValue();
// ABSD only available for type v4i32/v8i16/v16i8
if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
return SDValue();
// At least to save one more dependent computation
if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
return SDValue();
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
// Can only handle unsigned comparison here
switch (CC) {
default:
return SDValue();
case ISD::SETUGT:
case ISD::SETUGE:
break;
case ISD::SETULT:
case ISD::SETULE:
std::swap(TrueOpnd, FalseOpnd);
break;
}
SDValue CmpOpnd1 = Cond.getOperand(0);
SDValue CmpOpnd2 = Cond.getOperand(1);
// SETCC CmpOpnd1 CmpOpnd2 cond
// TrueOpnd = CmpOpnd1 - CmpOpnd2
// FalseOpnd = CmpOpnd2 - CmpOpnd1
if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
TrueOpnd.getOperand(1) == CmpOpnd2 &&
FalseOpnd.getOperand(0) == CmpOpnd2 &&
FalseOpnd.getOperand(1) == CmpOpnd1) {
return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
CmpOpnd1, CmpOpnd2,
DAG.getTargetConstant(0, dl, MVT::i32));
}
return SDValue();
}

View File

@ -1118,6 +1118,7 @@ namespace llvm {
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it

View File

@ -526,9 +526,10 @@ define <4 x i32> @absd_int32_ugt(<4 x i32>, <4 x i32>) {
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_ugt
; CHECK: vcmpgtuw
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtuw
; CHECK-NOT: xxsel
; CHECK: vabsduw v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int32_ugt
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
@ -542,9 +543,10 @@ define <4 x i32> @absd_int32_uge(<4 x i32>, <4 x i32>) {
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_uge
; CHECK: vcmpgtuw
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtuw
; CHECK-NOT: xxsel
; CHECK: vabsduw v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int32_uge
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
@ -558,9 +560,10 @@ define <4 x i32> @absd_int32_ult(<4 x i32>, <4 x i32>) {
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_ult
; CHECK: vcmpgtuw
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtuw
; CHECK-NOT: xxsel
; CHECK: vabsduw v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int32_ult
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
@ -574,9 +577,10 @@ define <4 x i32> @absd_int32_ule(<4 x i32>, <4 x i32>) {
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_ule
; CHECK: vcmpgtuw
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtuw
; CHECK-NOT: xxsel
; CHECK: vabsduw v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int32_ule
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
@ -590,9 +594,10 @@ define <8 x i16> @absd_int16_ugt(<8 x i16>, <8 x i16>) {
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
ret <8 x i16> %6
; CHECK-LABEL: absd_int16_ugt
; CHECK: vcmpgtuh
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtuh
; CHECK-NOT: xxsel
; CHECK: vabsduh v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int16_ugt
; CHECK-PWR8: vcmpgtuh
; CHECK-PWR8: xxsel
@ -606,9 +611,10 @@ define <8 x i16> @absd_int16_uge(<8 x i16>, <8 x i16>) {
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
ret <8 x i16> %6
; CHECK-LABEL: absd_int16_uge
; CHECK: vcmpgtuh
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtuh
; CHECK-NOT: xxsel
; CHECK: vabsduh v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int16_uge
; CHECK-PWR8: vcmpgtuh
; CHECK-PWR8: xxsel
@ -622,9 +628,10 @@ define <8 x i16> @absd_int16_ult(<8 x i16>, <8 x i16>) {
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
ret <8 x i16> %6
; CHECK-LABEL: absd_int16_ult
; CHECK: vcmpgtuh
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtuh
; CHECK-NOT: xxsel
; CHECK: vabsduh v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int16_ult
; CHECK-PWR8: vcmpgtuh
; CHECK-PWR8: xxsel
@ -638,9 +645,10 @@ define <8 x i16> @absd_int16_ule(<8 x i16>, <8 x i16>) {
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
ret <8 x i16> %6
; CHECK-LABEL: absd_int16_ule
; CHECK: vcmpgtuh
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtuh
; CHECK-NOT: xxsel
; CHECK: vabsduh v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int16_ule
; CHECK-PWR8: vcmpgtuh
; CHECK-PWR8: xxsel
@ -654,9 +662,10 @@ define <16 x i8> @absd_int8_ugt(<16 x i8>, <16 x i8>) {
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
ret <16 x i8> %6
; CHECK-LABEL: absd_int8_ugt
; CHECK: vcmpgtub
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtub
; CHECK-NOT: xxsel
; CHECK: vabsdub v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int8_ugt
; CHECK-PWR8: vcmpgtub
; CHECK-PWR8: xxsel
@ -670,9 +679,10 @@ define <16 x i8> @absd_int8_uge(<16 x i8>, <16 x i8>) {
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
ret <16 x i8> %6
; CHECK-LABEL: absd_int8_uge
; CHECK: vcmpgtub
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtub
; CHECK-NOT: xxsel
; CHECK: vabsdub v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int8_uge
; CHECK-PWR8: vcmpgtub
; CHECK-PWR8: xxsel
@ -686,9 +696,10 @@ define <16 x i8> @absd_int8_ult(<16 x i8>, <16 x i8>) {
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
ret <16 x i8> %6
; CHECK-LABEL: absd_int8_ult
; CHECK: vcmpgtub
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtub
; CHECK-NOT: xxsel
; CHECK: vabsdub v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int8_ult
; CHECK-PWR8: vcmpgtub
; CHECK-PWR8: xxsel
@ -702,15 +713,52 @@ define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) {
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
ret <16 x i8> %6
; CHECK-LABEL: absd_int8_ule
; CHECK: vcmpgtub
; CHECK: xxsel
; CHECK: blr
; CHECK-NOT: vcmpgtub
; CHECK-NOT: xxsel
; CHECK: vabsdub v2, v2, v3
; CHECK-NEXT: blr
; CHECK-PWR8-LABEL: absd_int8_ule
; CHECK-PWR8: vcmpgtub
; CHECK-PWR8: xxsel
; CHECK-PWR8: blr
}
; some cases we are unable to optimize
; check whether goes beyond the scope
define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
%3 = icmp ugt <4 x i32> %0, %1
%4 = sub <4 x i32> %0, %1
%5 = sub <4 x i32> %1, %0
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
ret <4 x i32> %6
; CHECK-LABEL: absd_int32_ugt_opp
; CHECK-NOT: vabsduw
; CHECK: vcmpgtuw
; CHECK: xxsel
; CHECK: blr
; CHECK-PWR8-LABEL: absd_int32_ugt_opp
; CHECK-PWR8: vcmpgtuw
; CHECK-PWR8: xxsel
; CHECK-PWR8: blr
}
define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) {
%3 = icmp ugt <2 x i64> %0, %1
%4 = sub <2 x i64> %0, %1
%5 = sub <2 x i64> %1, %0
%6 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %5
ret <2 x i64> %6
; CHECK-LABEL: absd_int64_ugt
; CHECK-NOT: vabsduw
; CHECK: vcmpgtud
; CHECK: xxsel
; CHECK: blr
; CHECK-PWR8-LABEL: absd_int64_ugt
; CHECK-PWR8: vcmpgtud
; CHECK-PWR8: xxsel
; CHECK-PWR8: blr
}
declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>)
declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>)