From 14aa2ad1f466d54e275ba9f1327e79ef7c8c52ce Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 26 May 2016 19:41:33 +0000 Subject: [PATCH] [AArch64] Generate rev16/rev32 from bswap + srl when upper bits are known zero. Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32) to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero. test_rev_w_srl16: test_rev_w_srl16: and w8, w0, #0xffff and w8, w0, #0xffff rev w8, w8 ---> rev16 w0, w8 lsr w0, w8, #16 test_rev_x_srl32: test_rev_x_srl32: rev x8, x8 ---> rev32 x0, x8 lsr x0, x8, #32 llvm-svn: 270896 --- .../Target/AArch64/AArch64ISelLowering.cpp | 32 ++++++++++++++++++- llvm/test/CodeGen/AArch64/arm64-rev.ll | 27 ++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2c8a4183f17d..cd02150c0a2e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -475,7 +475,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Also, try to fold ADD into CSINC/CSINV.. setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); - + setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::XOR); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); @@ -8001,6 +8001,34 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); } +static SDValue performSRLCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the + // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32) + // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero. + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() == ISD::BSWAP) { + SDLoc DL(N); + SDValue N1 = N->getOperand(1); + SDValue N00 = N0.getOperand(0); + if (ConstantSDNode *C = dyn_cast(N1)) { + uint64_t ShiftAmt = C->getZExtValue(); + if (VT == MVT::i32 && ShiftAmt == 16 && + DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16))) + return DAG.getNode(ISD::ROTR, DL, VT, N0, N1); + if (VT == MVT::i64 && ShiftAmt == 32 && + DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32))) + return DAG.getNode(ISD::ROTR, DL, VT, N0, N1); + } + } + return SDValue(); +} + static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -9893,6 +9921,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performFDivCombine(N, DAG, Subtarget); case ISD::OR: return performORCombine(N, DCI, Subtarget); + case ISD::SRL: + return performSRLCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return performIntrinsicCombine(N, DCI, Subtarget); case ISD::ANY_EXTEND: diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll index 74356d76d3c8..4980d7e3b275 100644 --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -16,6 +16,33 @@ entry: ret i64 %0 } +; Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 16-bits +; of %a are zero. This optimizes rev + lsr 16 to rev16. +define i32 @test_rev_w_srl16(i16 %a) { +entry: +; CHECK-LABEL: test_rev_w_srl16: +; CHECK: and [[REG:w[0-9]+]], w0, #0xffff +; CHECK: rev16 w0, [[REG]] +; CHECK-NOT: lsr + %0 = zext i16 %a to i32 + %1 = tail call i32 @llvm.bswap.i32(i32 %0) + %2 = lshr i32 %1, 16 + ret i32 %2 +} + +; Canonicalize (srl (bswap x), 32) to (rotr (bswap x), 32) if the high 32-bits +; of %a are zero. This optimizes rev + lsr 32 to rev32. +define i64 @test_rev_x_srl32(i32 %a) { +entry: +; CHECK-LABEL: test_rev_x_srl32: +; CHECK: rev32 x0, {{x[0-9]+}} +; CHECK-NOT: lsr + %0 = zext i32 %a to i64 + %1 = tail call i64 @llvm.bswap.i64(i64 %0) + %2 = lshr i64 %1, 32 + ret i64 %2 +} + declare i32 @llvm.bswap.i32(i32) nounwind readnone declare i64 @llvm.bswap.i64(i64) nounwind readnone