[SelectionDAG] swap select_cc operands to enable folding

The DAGCombiner tries to SimplifySelectCC as follows: select_cc(x, y, 16, 0, cc) -> shl(zext(set_cc(x, y, cc)), 4) It can't cope with the situation of reordered operands: select_cc(x, y, 0, 16, cc) In that case we just need to swap the operands and invert the Condition Code: select_cc(x, y, 16, 0, ~cc) Differential Revision: https://reviews.llvm.org/D53236 llvm-svn: 346484
2018-11-09 11:09:40 +00:00 · 2018-11-09 11:09:40 +00:00 · e15c982f6d
parent 52578f95c9
commit e15c982f6d
3 changed files with 95 additions and 45 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -18173,6 +18173,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
  EVT VT = N2.getValueType();
  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());

  // Determine if the condition we're dealing with is constant
  SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
@ -18273,48 +18274,49 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
  }

  // fold select C, 16, 0 -> shl C, 4
-  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
+  bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
+  bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
+
+  if ((Fold || Swap) &&
      TLI.getBooleanContents(N0.getValueType()) ==
-          TargetLowering::ZeroOrOneBooleanContent) {
+          TargetLowering::ZeroOrOneBooleanContent &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(ISD::SETCC, N0.getValueType()))) {
+
+    if (Swap) {
+      CC = ISD::getSetCCInverse(CC, N0.getValueType().isInteger());
+      std::swap(N2C, N3C);
+    }

    // If the caller doesn't want us to simplify this into a zext of a compare,
    // don't do it.
    if (NotExtCompare && N2C->isOne())
      return SDValue();

-    // Get a SetCC of the condition
-    // NOTE: Don't create a SETCC if it's not legal on this target.
-    if (!LegalOperations ||
-        TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
-      SDValue Temp, SCC;
-      // cast from setcc result type to select result type
-      if (LegalTypes) {
-        SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
-                            N0, N1, CC);
-        if (N2.getValueType().bitsLT(SCC.getValueType()))
-          Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
-                                        N2.getValueType());
-        else
-          Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
-                             N2.getValueType(), SCC);
-      } else {
-        SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
-        Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
-                           N2.getValueType(), SCC);
-      }
-
-      AddToWorklist(SCC.getNode());
-      AddToWorklist(Temp.getNode());
-
-      if (N2C->isOne())
-        return Temp;
-
-      // shl setcc result by log2 n2c
-      return DAG.getNode(
-          ISD::SHL, DL, N2.getValueType(), Temp,
-          DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
-                          getShiftAmountTy(Temp.getValueType())));
+    SDValue Temp, SCC;
+    // zext (setcc n0, n1)
+    if (LegalTypes) {
+      SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
+      if (N2.getValueType().bitsLT(SCC.getValueType()))
+        Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), N2.getValueType());
+      else
+        Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC);
+    } else {
+      SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
+      Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC);
    }
+
+    AddToWorklist(SCC.getNode());
+    AddToWorklist(Temp.getNode());
+
+    if (N2C->isOne())
+      return Temp;
+
+    // shl setcc result by log2 n2c
+    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
+                                       SDLoc(Temp),
+                                       getShiftAmountTy(Temp.getValueType())));
  }

  // Check to see if this is an integer abs.
--- a/llvm/test/CodeGen/AArch64/select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/select_cc.ll
@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+define i64 @select_ogt_float(float %a, float %b) {
+; CHECK-LABEL: select_ogt_float:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w8, gt
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+entry:
+  %cc = fcmp ogt float %a, %b
+  %sel = select i1 %cc, i64 4, i64 0
+  ret i64 %sel
+}
+
+define i64 @select_ule_float_inverse(float %a, float %b) {
+; CHECK-LABEL: select_ule_float_inverse:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w8, gt
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+entry:
+  %cc = fcmp ule float %a, %b
+  %sel = select i1 %cc, i64 0, i64 4
+  ret i64 %sel
+}
+
+define i64 @select_eq_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: select_eq_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+entry:
+  %cc = icmp eq i32 %a, %b
+  %sel = select i1 %cc, i64 4, i64 0
+  ret i64 %sel
+}
+
+define i64 @select_ne_i32_inverse(i32 %a, i32 %b) {
+; CHECK-LABEL: select_ne_i32_inverse:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    lsl x0, x8, #2
+; CHECK-NEXT:    ret
+entry:
+  %cc = icmp ne i32 %a, %b
+  %sel = select i1 %cc, i64 0, i64 4
+  ret i64 %sel
+}
--- a/llvm/test/CodeGen/Thumb/branchless-cmp.ll
+++ b/llvm/test/CodeGen/Thumb/branchless-cmp.ll
@ -74,23 +74,17 @@ entry:
 ; CHECK-NEXT: lsls	r0, r1, #2
 }

-; FIXME: This one hasn't changed actually
-; but could look like test3b
 define i32 @test4a(i32 %a, i32 %b) {
 entry:
  %cmp = icmp ne i32 %a, %b
  %cond = select i1 %cmp, i32 0, i32 4
  ret i32 %cond
 ; CHECK-LABEL: test4a:
-; CHECK: bb.0:
-; CHECK-NEXT:  cmp     r0, r1
-; CHECK-NEXT:  bne     .LBB6_2
-; CHECK-NEXT: bb.1:
-; CHECK-NEXT:  movs    r0, #4
-; CHECK-NEXT:  bx      lr
-; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT:  movs    r0, #0
-; CHECK-NEXT:  bx      lr
+; CHECK-NOT: b{{(ne)|(eq)}}
+; CHECK:      subs	r0, r0, r1
+; CHECK-NEXT: rsbs	r1, r0, #0
+; CHECK-NEXT: adcs	r1, r0
+; CHECK-NEXT: lsls	r0, r1, #2
 }

 define i32 @test4b(i32 %a, i32 %b) {