From 7d86e47d04a76362c2621f6aa2f82b83eb068347 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 21 Aug 2013 09:34:56 +0000 Subject: [PATCH] [SystemZ] Define remainig *MUL_LOHI patterns The initial port used MLG(R) for i64 UMUL_LOHI but left the other three combinations as not-legal-or-custom. Although 32x32->{32,32} multiplications exist, they're not as quick as doing a normal 64-bit multiplication, so it didn't seem like i32 SMUL_LOHI and UMUL_LOHI would be useful. There's also no direct instruction for i64 SMUL_LOHI, so it needs to be implemented in terms of UMUL_LOHI. However, not defining these patterns means that we don't convert division by a constant into multiplication, so this patch fills in the other cases. The new i64 SMUL_LOHI sequence is simpler than the one that we used previously for 64x64->128 multiplication, so int-mul-08.ll now tests the full sequence. llvm-svn: 188898 --- .../Target/SystemZ/SystemZISelLowering.cpp | 88 +++++++++++++++---- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 1 + llvm/test/CodeGen/SystemZ/int-div-06.ll | 56 ++++++++++++ llvm/test/CodeGen/SystemZ/int-mul-08.ll | 10 ++- 4 files changed, 136 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/int-div-06.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 7772b9ed443e..a1eecd736ce9 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -128,9 +128,11 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); - // Use *MUL_LOHI where possible and a wider multiplication otherwise. + // Use *MUL_LOHI where possible instead of MULH*. setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Custom); + setOperationAction(ISD::UMUL_LOHI, VT, Custom); // We have instructions for signed but not unsigned FP conversion. setOperationAction(ISD::FP_TO_UINT, VT, Expand); @@ -165,14 +167,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); - // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR), - // but they aren't really worth using. There is no 64-bit SMUL_LOHI, - // but there is a 64-bit UMUL_LOHI: MLGR. - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); - // FIXME: Can we support these natively? setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); @@ -1142,6 +1136,20 @@ static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, DL, MVT::Glue, CmpOp0, CmpOp1); } +// Implement a 32-bit *MUL_LOHI operation by extending both operands to +// 64 bits. Extend is the extension type to use. Store the high part +// in Hi and the low part in Lo. +static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, + unsigned Extend, SDValue Op0, SDValue Op1, + SDValue &Hi, SDValue &Lo) { + Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); + Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); + SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); + Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64)); + Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); + Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); +} + // Lower a binary operation that produces two VT results, one in each // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation @@ -1427,18 +1435,64 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, 2, DL); } +SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue Ops[2]; + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else { + // Do a full 128-bit multiplication based on UMUL_LOHI64: + // + // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) + // + // but using the fact that the upper halves are either all zeros + // or all ones: + // + // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) + // + // and grouping the right terms together since they are quicker than the + // multiplication: + // + // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) + SDValue C63 = DAG.getConstant(63, MVT::i64); + SDValue LL = Op.getOperand(0); + SDValue RL = Op.getOperand(1); + SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); + SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. SMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + LL, RL, Ops[1], Ops[0]); + SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); + SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); + SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); + Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); + } + return DAG.getMergeValues(Ops, 2, DL); +} + SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); - assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI"); - - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. UMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + if (is32Bit(VT)) + // Just do a normal 64-bit multiplication and extract the results. + // We define this so that it can be used for constant division. + lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), + Op.getOperand(1), Ops[1], Ops[0]); + else + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. UMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, 2, DL); } @@ -1706,6 +1760,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerVACOPY(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SMUL_LOHI: + return lowerSMUL_LOHI(Op, DAG); case ISD::UMUL_LOHI: return lowerUMUL_LOHI(Op, DAG); case ISD::SDIVREM: diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 3692e1e053b9..604453d2ddea 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -214,6 +214,7 @@ private: SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/SystemZ/int-div-06.ll b/llvm/test/CodeGen/SystemZ/int-div-06.ll new file mode 100644 index 000000000000..8576b1b6270a --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-div-06.ll @@ -0,0 +1,56 @@ +; Test that divisions by constants are implemented as multiplications. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Check signed 32-bit division. +define i32 @f1(i32 %a) { +; CHECK-LABEL: f1: +; CHECK: lgfr [[REG:%r[0-5]]], %r2 +; CHECK: msgfi [[REG]], 502748801 +; CHECK-DAG: srlg [[RES1:%r[0-5]]], [[REG]], 63 +; CHECK-DAG: srag %r2, [[REG]], 46 +; CHECK: ar %r2, [[RES1]] +; CHECK: br %r14 + %b = sdiv i32 %a, 139968 + ret i32 %b +} + +; Check unsigned 32-bit division. +define i32 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: llgfr [[REG:%r[0-5]]], %r2 +; CHECK: msgfi [[REG]], 502748801 +; CHECK: srlg %r2, [[REG]], 46 +; CHECK: br %r14 + %b = udiv i32 %a, 139968 + ret i32 %b +} + +; Check signed 64-bit division. +define i64 @f3(i64 %dummy, i64 %a) { +; CHECK-LABEL: f3: +; CHECK-DAG: llihf [[CONST:%r[0-5]]], 1005497601 +; CHECK-DAG: oilf [[CONST]], 4251762321 +; CHECK-DAG: srag [[REG:%r[0-5]]], %r3, 63 +; CHECK-DAG: ngr [[REG]], [[CONST]] +; CHECK-DAG: mlgr %r2, [[CONST]] +; CHECK: sgr %r2, [[REG]] +; CHECK: srlg [[RES1:%r[0-5]]], %r2, 63 +; CHECK: srag %r2, %r2, 15 +; CHECK: agr %r2, [[RES1]] +; CHECK: br %r14 + %b = sdiv i64 %a, 139968 + ret i64 %b +} + +; Check unsigned 64-bit division. +define i64 @f4(i64 %dummy, i64 %a) { +; CHECK-LABEL: f4: +; CHECK: llihf [[CONST:%r[0-5]]], 1005497601 +; CHECK: oilf [[CONST]], 4251762321 +; CHECK: mlgr %r2, [[CONST]] +; CHECK: srlg %r2, %r2, 15 +; CHECK: br %r14 + %b = udiv i64 %a, 139968 + ret i64 %b +} diff --git a/llvm/test/CodeGen/SystemZ/int-mul-08.ll b/llvm/test/CodeGen/SystemZ/int-mul-08.ll index a245760e1809..90b26a4f3dde 100644 --- a/llvm/test/CodeGen/SystemZ/int-mul-08.ll +++ b/llvm/test/CodeGen/SystemZ/int-mul-08.ll @@ -22,9 +22,13 @@ define i64 @f1(i64 %dummy, i64 %a, i64 %b) { ; This needs a rather convoluted sequence. define i64 @f2(i64 %dummy, i64 %a, i64 %b) { ; CHECK-LABEL: f2: -; CHECK: mlgr -; CHECK: agr -; CHECK: agr +; CHECK-DAG: srag [[RES1:%r[0-5]]], %r3, 63 +; CHECK-DAG: srag [[RES2:%r[0-5]]], %r4, 63 +; CHECK-DAG: ngr [[RES1]], %r4 +; CHECK-DAG: ngr [[RES2]], %r3 +; CHECK-DAG: agr [[RES2]], [[RES1]] +; CHECK-DAG: mlgr %r2, %r4 +; CHECK: sgr %r2, [[RES2]] ; CHECK: br %r14 %ax = sext i64 %a to i128 %bx = sext i64 %b to i128