[PowerPC] Cannonicalize applicable vector shift immediates as swaps

This patch corresponds to review:
http://reviews.llvm.org/D21358

Vector shifts that have the same semantics as a vector swap are cannonicalized
as such to provide additional opportunities for swap removal optimization to
remove unnecessary swaps.

llvm-svn: 275168
This commit is contained in:
Nemanja Ivanovic 2016-07-12 12:16:27 +00:00
parent acee568545
commit eebbcb6d57
6 changed files with 53 additions and 11 deletions

View File

@ -1083,6 +1083,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
case PPCISD::RFEBB: return "PPCISD::RFEBB";
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
@ -7356,6 +7357,14 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
DAG.getConstant(SplatIdx, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
}
// Left shifts of 8 bytes are actually swaps. Convert accordingly.
if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
}
}
if (Subtarget.hasQPX()) {

View File

@ -311,6 +311,10 @@ namespace llvm {
/// of outputs.
XXSWAPD,
/// An SDNode for swaps that are not associated with any loads/stores
/// and thereby have no chain.
SWAP_NO_CHAIN,
/// QVFPERM = This corresponds to the QPX qvfperm instruction.
QVFPERM,

View File

@ -71,6 +71,7 @@ def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
@ -963,6 +964,7 @@ def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
// Selects.
def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
@ -1318,7 +1320,7 @@ def VectorExtractions {
(v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
// Word extraction
dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64));
dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64));
dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
(v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
@ -1588,7 +1590,7 @@ let Predicates = [IsBigEndian, HasP8Vector] in {
def : Pat<(f32 (vector_extract v4f32:$S, 1)),
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
def : Pat<(f32 (vector_extract v4f32:$S, 2)),
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
(f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
def : Pat<(f32 (vector_extract v4f32:$S, 3)),
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
@ -1693,7 +1695,7 @@ let Predicates = [IsLittleEndian, HasP8Vector] in {
def : Pat<(f32 (vector_extract v4f32:$S, 0)),
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
def : Pat<(f32 (vector_extract v4f32:$S, 1)),
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
(f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
def : Pat<(f32 (vector_extract v4f32:$S, 2)),
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
def : Pat<(f32 (vector_extract v4f32:$S, 3)),

View File

@ -0,0 +1,27 @@
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -verify-machineinstrs < %s | FileCheck %s
define <4 x i32> @test1(<4 x i32> %a) {
entry:
; CHECK-LABEL: test1
; CHECK: xxswapd 34, 34
%vecins6 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
ret <4 x i32> %vecins6
}
define <8 x i16> @test2(<8 x i16> %a) #0 {
entry:
; CHECK-LABEL: test2
; CHECK: xxswapd 34, 34
%vecins14 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x i16> %vecins14
}
define <16 x i8> @test3(<16 x i8> %a) #0 {
entry:
; CHECK-LABEL: test3
; CHECK: xxswapd 34, 34
%vecins30 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <16 x i8> %vecins30
}

View File

@ -1102,7 +1102,7 @@ entry:
; CHECK: mfvsrwz 3, [[SHL]]
; CHECK: extsw 3, 3
; CHECK-LE-LABEL: @getsi0
; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
; CHECK-LE: mfvsrwz 3, [[SHL]]
; CHECK-LE: extsw 3, 3
}
@ -1150,7 +1150,7 @@ entry:
%vecext = extractelement <4 x i32> %0, i32 3
ret i32 %vecext
; CHECK-LABEL: @getsi3
; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
; CHECK: xxswapd [[SHL:[0-9]+]], 34
; CHECK: mfvsrwz 3, [[SHL]]
; CHECK: extsw 3, 3
; CHECK-LE-LABEL: @getsi3
@ -1172,7 +1172,7 @@ entry:
; CHECK: mfvsrwz 3, [[SHL]]
; CHECK: clrldi 3, 3, 32
; CHECK-LE-LABEL: @getui0
; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
; CHECK-LE: mfvsrwz 3, [[SHL]]
; CHECK-LE: clrldi 3, 3, 32
}
@ -1220,7 +1220,7 @@ entry:
%vecext = extractelement <4 x i32> %0, i32 3
ret i32 %vecext
; CHECK-LABEL: @getui3
; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
; CHECK: xxswapd [[SHL:[0-9]+]], 34
; CHECK: mfvsrwz 3, [[SHL]]
; CHECK: clrldi 3, 3, 32
; CHECK-LE-LABEL: @getui3
@ -1380,7 +1380,7 @@ entry:
; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
; CHECK: xscvspdpn 1, [[SHL]]
; CHECK-LE-LABEL: @getf1
; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
; CHECK-LE: xscvspdpn 1, [[SHL]]
}
@ -1393,7 +1393,7 @@ entry:
%vecext = extractelement <4 x float> %0, i32 2
ret float %vecext
; CHECK-LABEL: @getf2
; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
; CHECK: xxswapd [[SHL:[0-9]+]], 34
; CHECK: xscvspdpn 1, [[SHL]]
; CHECK-LE-LABEL: @getf2
; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1

View File

@ -9,8 +9,8 @@ entry:
ret <2 x i32> %strided.vec
; CHECK-LABEL: @test1
; CHECK: vsldoi [[TGT:[0-9]+]], 2, 2, 8
; CHECK: vmrghw 2, 2, [[TGT]]
; CHECK: xxswapd 35, 34
; CHECK: vmrghw 2, 2, 3
; CHECK: blr
}