From 23bcf06a15e5ade824fdd6d3290a2a011d721dcc Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 19 Apr 2018 14:24:31 +0000 Subject: [PATCH] [Hexagon] Add/fix patterns for 32/64-bit vector compares and logical ops llvm-svn: 330330 --- .../Target/Hexagon/HexagonISelLowering.cpp | 95 ++++---- llvm/lib/Target/Hexagon/HexagonPatterns.td | 91 ++++--- llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll | 6 +- llvm/test/CodeGen/Hexagon/vect/bit4x8.ll | 27 +++ llvm/test/CodeGen/Hexagon/vect/setcc-not.ll | 102 ++++++++ llvm/test/CodeGen/Hexagon/vect/setcc-v32.ll | 224 ++++++++++++++++++ 6 files changed, 444 insertions(+), 101 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/vect/bit4x8.ll create mode 100644 llvm/test/CodeGen/Hexagon/vect/setcc-not.ll create mode 100644 llvm/test/CodeGen/Hexagon/vect/setcc-v32.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 91af9e228eaa..40740f1e1564 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -803,63 +803,62 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV)); } -static bool isSExtFree(SDValue N) { - // A sign-extend of a truncate of a sign-extend is free. - if (N.getOpcode() == ISD::TRUNCATE && - N.getOperand(0).getOpcode() == ISD::AssertSext) - return true; - // We have sign-extended loads. - if (N.getOpcode() == ISD::LOAD) - return true; - return false; -} - SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); + const SDLoc &dl(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); - SDValue Cmp = Op.getOperand(2); - ISD::CondCode CC = cast(Cmp)->get(); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + MVT ResTy = ty(Op); + MVT OpTy = ty(LHS); - EVT VT = Op.getValueType(); - EVT LHSVT = LHS.getValueType(); - EVT RHSVT = RHS.getValueType(); - - if (LHSVT == MVT::v2i16) { - assert(CC == ISD::SETEQ || CC == ISD::SETNE || - ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC)); - unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND - : ISD::ZERO_EXTEND; - SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS); - SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS); - SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp); - return SC; + if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) { + MVT ElemTy = OpTy.getVectorElementType(); + assert(ElemTy.isScalarInteger()); + MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()), + OpTy.getVectorNumElements()); + return DAG.getSetCC(dl, ResTy, + DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy), + DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC); } // Treat all other vector types as legal. - if (VT.isVector()) + if (ResTy.isVector()) return Op; - // Equals and not equals should use sign-extend, not zero-extend, since - // we can represent small negative values in the compare instructions. + // Comparisons of short integers should use sign-extend, not zero-extend, + // since we can represent small negative values in the compare instructions. // The LLVM default is to use zero-extend arbitrarily in these cases. - if ((CC == ISD::SETEQ || CC == ISD::SETNE) && - (RHSVT == MVT::i8 || RHSVT == MVT::i16) && - (LHSVT == MVT::i8 || LHSVT == MVT::i16)) { + auto isSExtFree = [this](SDValue N) { + switch (N.getOpcode()) { + case ISD::TRUNCATE: { + // A sign-extend of a truncate of a sign-extend is free. + SDValue Op = N.getOperand(0); + if (Op.getOpcode() != ISD::AssertSext) + return false; + MVT OrigTy = cast(Op.getOperand(1))->getVT().getSimpleVT(); + unsigned ThisBW = ty(N).getSizeInBits(); + unsigned OrigBW = OrigTy.getSizeInBits(); + // The type that was sign-extended to get the AssertSext must be + // narrower than the type of N (so that N has still the same value + // as the original). + return ThisBW >= OrigBW; + } + case ISD::LOAD: + // We have sign-extended loads. + return true; + } + return false; + }; + + if (OpTy == MVT::i8 || OpTy == MVT::i16) { ConstantSDNode *C = dyn_cast(RHS); - if (C && C->getAPIntValue().isNegative()) { - LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); - RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); - return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), - LHS, RHS, Op.getOperand(2)); - } - if (isSExtFree(LHS) || isSExtFree(RHS)) { - LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); - RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); - return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), - LHS, RHS, Op.getOperand(2)); - } + bool IsNegative = C && C->getAPIntValue().isNegative(); + if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS)) + return DAG.getSetCC(dl, ResTy, + DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32), + DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC); } + return SDValue(); } @@ -1306,8 +1305,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BlockAddress, MVT::i32, Custom); // Hexagon needs to optimize cases with negative constants. - setOperationAction(ISD::SETCC, MVT::i8, Custom); - setOperationAction(ISD::SETCC, MVT::i16, Custom); + setOperationAction(ISD::SETCC, MVT::i8, Custom); + setOperationAction(ISD::SETCC, MVT::i16, Custom); + setOperationAction(ISD::SETCC, MVT::v4i8, Custom); + setOperationAction(ISD::SETCC, MVT::v2i16, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex. setOperationAction(ISD::VASTART, MVT::Other, Custom); diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 6806981ab588..91e03b561d83 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -616,27 +616,40 @@ def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; -def: Pat<(i1 (setne I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setle I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setule I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>; -def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>; +class OpmR_RR_pat + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (Output RsPred:$Rs, RtPred:$Rt)>; -def: Pat<(i1 (setle I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>; -def: Pat<(i1 (setne I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>; -def: Pat<(i1 (setge I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>; -def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>; -def: Pat<(i1 (setule I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>; +class Outn + : OutPatFrag<(ops node:$Rs, node:$Rt), + (C2_not (MI $Rs, $Rt))>; + +def: OpmR_RR_pat, setne, i1, I32>; +def: OpmR_RR_pat, setle, i1, I32>; +def: OpmR_RR_pat, setule, i1, I32>; +def: OpmR_RR_pat, RevCmp, i1, I32>; +def: OpmR_RR_pat, RevCmp, i1, I32>; +def: OpmR_RR_pat, setne, i1, I64>; +def: OpmR_RR_pat, setle, i1, I64>; +def: OpmR_RR_pat, setule, i1, I64>; +def: OpmR_RR_pat, RevCmp, i1, I64>; +def: OpmR_RR_pat, RevCmp, i1, I64>; +def: OpmR_RR_pat, setne, v8i1, V8I8>; +def: OpmR_RR_pat, setle, v8i1, V8I8>; +def: OpmR_RR_pat, setule, v8i1, V8I8>; +def: OpmR_RR_pat, RevCmp, v8i1, V8I8>; +def: OpmR_RR_pat, RevCmp, v8i1, V8I8>; +def: OpmR_RR_pat, setne, v4i1, V4I16>; +def: OpmR_RR_pat, setle, v4i1, V4I16>; +def: OpmR_RR_pat, setule, v4i1, V4I16>; +def: OpmR_RR_pat, RevCmp, v4i1, V4I16>; +def: OpmR_RR_pat, RevCmp, v4i1, V4I16>; +def: OpmR_RR_pat, setne, v2i1, V2I32>; +def: OpmR_RR_pat, setle, v2i1, V2I32>; +def: OpmR_RR_pat, setule, v2i1, V2I32>; +def: OpmR_RR_pat, RevCmp, v2i1, V2I32>; +def: OpmR_RR_pat, RevCmp, v2i1, V2I32>; let AddedComplexity = 100 in { def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), @@ -703,34 +716,12 @@ def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>; def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>; def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; -def: Pat<(v4i1 (seteq V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(v4i1 (setgt V4I8:$Rs, V4I8:$Rt)), - (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(v4i1 (setugt V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(v2i1 (seteq V2I16:$Rs, V2I16:$Rt)), - (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(v2i1 (setgt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(v2i1 (setugt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), - (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; - // Floating-point comparisons with checks for ordered/unordered status. class T3 : OutPatFrag<(ops node:$Rs, node:$Rt), (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; -class OpmR_RR_pat - : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), - (Output RsPred:$Rs, RtPred:$Rt)>; - class Cmpuf: T3; class Cmpud: T3; @@ -753,10 +744,6 @@ let Predicates = [HasV5T] in { def: OpmR_RR_pat, setune, i1, F64>; } -class Outn - : OutPatFrag<(ops node:$Rs, node:$Rt), - (C2_not (MI $Rs, $Rt))>; - let Predicates = [HasV5T] in { def: OpmR_RR_pat, setone, i1, F32>; def: OpmR_RR_pat, setne, i1, F32>; @@ -1221,18 +1208,20 @@ def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; - def: OpR_RR_pat; -def: OpR_RR_pat; -def: OpR_RR_pat; def: OpR_RR_pat; -def: OpR_RR_pat; -def: OpR_RR_pat; def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; diff --git a/llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll b/llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll index b9cbb2e0fa47..f86263534f8c 100644 --- a/llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll +++ b/llvm/test/CodeGen/Hexagon/isel-zext-vNi1.ll @@ -5,9 +5,9 @@ ; changes, the tests should be changed as well. ; CHECK-LABEL: f0: -; CHECK-DAG: r[[D00:([0-9]+:[0-9]+)]] = combine(#0,r0) -; CHECK-DAG: r[[D01:([0-9]+:[0-9]+)]] = combine(#0,r1) -; CHECK: p[[P00:[0-3]]] = vcmpb.gt(r[[D01]],r[[D00]]) +; CHECK-DAG: r[[D00:([0-9]+:[0-9]+)]] = vsxtbh(r0) +; CHECK-DAG: r[[D01:([0-9]+:[0-9]+)]] = vsxtbh(r1) +; CHECK: p[[P00:[0-3]]] = vcmph.gt(r[[D01]],r[[D00]]) ; CHECK: r{{[0-9]+}}:[[R00:[0-9]+]] = mask(p[[P00]]) ; CHECK: r0 = and(r[[R00]],##16843009) define <4 x i8> @f0(<4 x i8> %a0, <4 x i8> %a1) #0 { diff --git a/llvm/test/CodeGen/Hexagon/vect/bit4x8.ll b/llvm/test/CodeGen/Hexagon/vect/bit4x8.ll new file mode 100644 index 000000000000..2cc45f37150f --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/bit4x8.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: test_00 +; CHECK: r0 = and(r0,r1) +; CHECK-NEXT: jumpr r31 +define <4 x i8> @test_00(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = and <4 x i8> %a0, %a1 + ret <4 x i8> %v0 +} + +; CHECK-LABEL: test_01 +; CHECK: r0 = or(r0,r1) +; CHECK-NEXT: jumpr r31 +define <4 x i8> @test_01(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = or <4 x i8> %a0, %a1 + ret <4 x i8> %v0 +} + +; CHECK-LABEL: test_02 +; CHECK: r0 = xor(r0,r1) +; CHECK-NEXT: jumpr r31 +define <4 x i8> @test_02(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = xor <4 x i8> %a0, %a1 + ret <4 x i8> %v0 +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Hexagon/vect/setcc-not.ll b/llvm/test/CodeGen/Hexagon/vect/setcc-not.ll new file mode 100644 index 000000000000..486a36ecf8c9 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/setcc-not.ll @@ -0,0 +1,102 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: test_00 +; CHECK: [[P00:p[0-9]+]] = vcmpb.eq(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <8 x i8> @test_00(<8 x i8> %a0, <8 x i8> %a1) #0 { + %v0 = icmp ne <8 x i8> %a0, %a1 + %v1 = sext <8 x i1> %v0 to <8 x i8> + ret <8 x i8> %v1 +} + +; CHECK-LABEL: test_01 +; CHECK: [[P00:p[0-9]+]] = vcmpb.gt(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <8 x i8> @test_01(<8 x i8> %a0, <8 x i8> %a1) #0 { + %v0 = icmp sle <8 x i8> %a0, %a1 + %v1 = sext <8 x i1> %v0 to <8 x i8> + ret <8 x i8> %v1 +} + +; CHECK-LABEL: test_02 +; CHECK: [[P00:p[0-9]+]] = vcmpb.gtu(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <8 x i8> @test_02(<8 x i8> %a0, <8 x i8> %a1) #0 { + %v0 = icmp ule <8 x i8> %a0, %a1 + %v1 = sext <8 x i1> %v0 to <8 x i8> + ret <8 x i8> %v1 +} + +; CHECK-LABEL: test_10 +; CHECK: [[P00:p[0-9]+]] = vcmph.eq(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <4 x i16> @test_10(<4 x i16> %a0, <4 x i16> %a1) #0 { + %v0 = icmp ne <4 x i16> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i16> + ret <4 x i16> %v1 +} + +; CHECK-LABEL: test_11 +; CHECK: [[P00:p[0-9]+]] = vcmph.gt(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <4 x i16> @test_11(<4 x i16> %a0, <4 x i16> %a1) #0 { + %v0 = icmp sle <4 x i16> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i16> + ret <4 x i16> %v1 +} + +; CHECK-LABEL: test_12 +; CHECK: [[P00:p[0-9]+]] = vcmph.gtu(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <4 x i16> @test_12(<4 x i16> %a0, <4 x i16> %a1) #0 { + %v0 = icmp ule <4 x i16> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i16> + ret <4 x i16> %v1 +} + +; CHECK-LABEL: test_20 +; CHECK: [[P00:p[0-9]+]] = vcmpw.eq(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <2 x i32> @test_20(<2 x i32> %a0, <2 x i32> %a1) #0 { + %v0 = icmp ne <2 x i32> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i32> + ret <2 x i32> %v1 +} + +; CHECK-LABEL: test_21 +; CHECK: [[P00:p[0-9]+]] = vcmpw.gt(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <2 x i32> @test_21(<2 x i32> %a0, <2 x i32> %a1) #0 { + %v0 = icmp sle <2 x i32> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i32> + ret <2 x i32> %v1 +} + +; CHECK-LABEL: test_22 +; CHECK: [[P00:p[0-9]+]] = vcmpw.gtu(r1:0,r3:2) +; CHECK: [[P01:p[0-9]+]] = not([[P00]]) +; CHECK: r1:0 = mask([[P01]]) +; CHECK: jumpr r31 +define <2 x i32> @test_22(<2 x i32> %a0, <2 x i32> %a1) #0 { + %v0 = icmp ule <2 x i32> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i32> + ret <2 x i32> %v1 +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/Hexagon/vect/setcc-v32.ll b/llvm/test/CodeGen/Hexagon/vect/setcc-v32.ll new file mode 100644 index 000000000000..9baf9241e3f2 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vect/setcc-v32.ll @@ -0,0 +1,224 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: test_00 +; CHECK: [[L00:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R00:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P00:p[0-3]+]] = vcmph.eq([[L00]],[[R00]]) +; CHECK-NOT: not([[P00]]) +define <4 x i8> @test_00(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp eq <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_01 +; CHECK: [[L01:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R01:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P01:p[0-3]+]] = vcmph.eq([[L01]],[[R01]]) +; CHECK: not([[P01]]) +define <4 x i8> @test_01(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp ne <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_02 +; CHECK: [[L02:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R02:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P02:p[0-3]+]] = vcmph.gt([[R02]],[[L02]]) +; CHECK-NOT: not([[P02]]) +define <4 x i8> @test_02(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp slt <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_03 +; CHECK: [[L03:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R03:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P03:p[0-3]+]] = vcmph.gt([[L03]],[[R03]]) +; CHECK: not([[P03]]) +define <4 x i8> @test_03(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp sle <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_04 +; CHECK: [[L04:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R04:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P04:p[0-3]+]] = vcmph.gt([[L04]],[[R04]]) +; CHECK-NOT: not([[P04]]) +define <4 x i8> @test_04(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp sgt <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_05 +; CHECK: [[L05:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R05:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P05:p[0-3]+]] = vcmph.gt([[R05]],[[L05]]) +; CHECK: not([[P05]]) +define <4 x i8> @test_05(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp sge <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_06 +; CHECK: [[L06:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R06:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P06:p[0-3]+]] = vcmph.gtu([[R06]],[[L06]]) +; CHECK-NOT: not([[P06]]) +define <4 x i8> @test_06(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp ult <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_07 +; CHECK: [[L07:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R07:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P07:p[0-3]+]] = vcmph.gtu([[L07]],[[R07]]) +; CHECK: not([[P07]]) +define <4 x i8> @test_07(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp ule <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_08 +; CHECK: [[L08:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R08:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P08:p[0-3]+]] = vcmph.gtu([[L08]],[[R08]]) +; CHECK-NOT: not([[P08]]) +define <4 x i8> @test_08(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp ugt <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + +; CHECK-LABEL: test_09 +; CHECK: [[L09:r[0-9:]+]] = vsxtbh(r0) +; CHECK: [[R09:r[0-9:]+]] = vsxtbh(r1) +; CHECK: [[P09:p[0-3]+]] = vcmph.gtu([[R09]],[[L09]]) +; CHECK: not([[P09]]) +define <4 x i8> @test_09(<4 x i8> %a0, <4 x i8> %a1) #0 { + %v0 = icmp uge <4 x i8> %a0, %a1 + %v1 = sext <4 x i1> %v0 to <4 x i8> + ret <4 x i8> %v1 +} + + +; CHECK-LABEL: test_10 +; CHECK: [[L10:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R10:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P10:p[0-3]+]] = vcmpw.eq([[L10]],[[R10]]) +; CHECK-NOT: not([[P10]]) +define <2 x i16> @test_10(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp eq <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_11 +; CHECK: [[L11:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R11:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P11:p[0-3]+]] = vcmpw.eq([[L11]],[[R11]]) +; CHECK: not([[P11]]) +define <2 x i16> @test_11(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp ne <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_12 +; CHECK: [[L12:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R12:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P12:p[0-3]+]] = vcmpw.gt([[R12]],[[L12]]) +; CHECK-NOT: not([[P12]]) +define <2 x i16> @test_12(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp slt <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_13 +; CHECK: [[L13:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R13:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P13:p[0-3]+]] = vcmpw.gt([[L13]],[[R13]]) +; CHECK: not([[P13]]) +define <2 x i16> @test_13(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp sle <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_14 +; CHECK: [[L14:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R14:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P14:p[0-3]+]] = vcmpw.gt([[L14]],[[R14]]) +; CHECK-NOT: not([[P14]]) +define <2 x i16> @test_14(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp sgt <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_15 +; CHECK: [[L15:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R15:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P15:p[0-3]+]] = vcmpw.gt([[R15]],[[L15]]) +; CHECK: not([[P15]]) +define <2 x i16> @test_15(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp sge <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_16 +; CHECK: [[L16:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R16:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P16:p[0-3]+]] = vcmpw.gtu([[R16]],[[L16]]) +; CHECK-NOT: not([[P16]]) +define <2 x i16> @test_16(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp ult <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_17 +; CHECK: [[L17:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R17:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P17:p[0-3]+]] = vcmpw.gtu([[L17]],[[R17]]) +; CHECK: not([[P17]]) +define <2 x i16> @test_17(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp ule <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_18 +; CHECK: [[L18:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R18:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P18:p[0-3]+]] = vcmpw.gtu([[L18]],[[R18]]) +; CHECK-NOT: not([[P18]]) +define <2 x i16> @test_18(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp ugt <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +; CHECK-LABEL: test_19 +; CHECK: [[L19:r[0-9:]+]] = vsxthw(r0) +; CHECK: [[R19:r[0-9:]+]] = vsxthw(r1) +; CHECK: [[P19:p[0-3]+]] = vcmpw.gtu([[R19]],[[L19]]) +; CHECK: not([[P19]]) +define <2 x i16> @test_19(<2 x i16> %a0, <2 x i16> %a1) #0 { + %v0 = icmp uge <2 x i16> %a0, %a1 + %v1 = sext <2 x i1> %v0 to <2 x i16> + ret <2 x i16> %v1 +} + +attributes #0 = { nounwind readnone }