[PowerPC] Cannonicalize applicable vector shift immediates as swaps
This patch corresponds to review: http://reviews.llvm.org/D21358 Vector shifts that have the same semantics as a vector swap are cannonicalized as such to provide additional opportunities for swap removal optimization to remove unnecessary swaps. llvm-svn: 275168
This commit is contained in:
parent
acee568545
commit
eebbcb6d57
|
@ -1083,6 +1083,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
|
||||
case PPCISD::RFEBB: return "PPCISD::RFEBB";
|
||||
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
|
||||
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
|
||||
case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
|
||||
case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
|
||||
case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
|
||||
|
@ -7356,6 +7357,14 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
|||
DAG.getConstant(SplatIdx, dl, MVT::i32));
|
||||
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
|
||||
}
|
||||
|
||||
// Left shifts of 8 bytes are actually swaps. Convert accordingly.
|
||||
if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
|
||||
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
|
||||
SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
|
||||
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (Subtarget.hasQPX()) {
|
||||
|
|
|
@ -311,6 +311,10 @@ namespace llvm {
|
|||
/// of outputs.
|
||||
XXSWAPD,
|
||||
|
||||
/// An SDNode for swaps that are not associated with any loads/stores
|
||||
/// and thereby have no chain.
|
||||
SWAP_NO_CHAIN,
|
||||
|
||||
/// QVFPERM = This corresponds to the QPX qvfperm instruction.
|
||||
QVFPERM,
|
||||
|
||||
|
|
|
@ -71,6 +71,7 @@ def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
|
|||
def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
|
||||
def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
|
||||
def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
|
||||
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
|
||||
|
||||
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
|
||||
string asmstr, InstrItinClass itin, Intrinsic Int,
|
||||
|
@ -963,6 +964,7 @@ def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
|
|||
def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
|
||||
def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
|
||||
def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
|
||||
def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
|
||||
|
||||
// Selects.
|
||||
def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
|
||||
|
@ -1318,7 +1320,7 @@ def VectorExtractions {
|
|||
(v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
|
||||
|
||||
// Word extraction
|
||||
dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64));
|
||||
dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64));
|
||||
dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
|
||||
dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
|
||||
(v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
|
||||
|
@ -1588,7 +1590,7 @@ let Predicates = [IsBigEndian, HasP8Vector] in {
|
|||
def : Pat<(f32 (vector_extract v4f32:$S, 1)),
|
||||
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
|
||||
def : Pat<(f32 (vector_extract v4f32:$S, 2)),
|
||||
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
|
||||
(f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
|
||||
def : Pat<(f32 (vector_extract v4f32:$S, 3)),
|
||||
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
|
||||
def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
|
||||
|
@ -1693,7 +1695,7 @@ let Predicates = [IsLittleEndian, HasP8Vector] in {
|
|||
def : Pat<(f32 (vector_extract v4f32:$S, 0)),
|
||||
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
|
||||
def : Pat<(f32 (vector_extract v4f32:$S, 1)),
|
||||
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
|
||||
(f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
|
||||
def : Pat<(f32 (vector_extract v4f32:$S, 2)),
|
||||
(f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
|
||||
def : Pat<(f32 (vector_extract v4f32:$S, 3)),
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s
|
||||
define <4 x i32> @test1(<4 x i32> %a) {
|
||||
entry:
|
||||
; CHECK-LABEL: test1
|
||||
; CHECK: xxswapd 34, 34
|
||||
%vecins6 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
|
||||
ret <4 x i32> %vecins6
|
||||
}
|
||||
|
||||
define <8 x i16> @test2(<8 x i16> %a) #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: test2
|
||||
; CHECK: xxswapd 34, 34
|
||||
%vecins14 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
|
||||
ret <8 x i16> %vecins14
|
||||
}
|
||||
|
||||
define <16 x i8> @test3(<16 x i8> %a) #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: test3
|
||||
; CHECK: xxswapd 34, 34
|
||||
%vecins30 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
ret <16 x i8> %vecins30
|
||||
}
|
|
@ -1102,7 +1102,7 @@ entry:
|
|||
; CHECK: mfvsrwz 3, [[SHL]]
|
||||
; CHECK: extsw 3, 3
|
||||
; CHECK-LE-LABEL: @getsi0
|
||||
; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
|
||||
; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
|
||||
; CHECK-LE: mfvsrwz 3, [[SHL]]
|
||||
; CHECK-LE: extsw 3, 3
|
||||
}
|
||||
|
@ -1150,7 +1150,7 @@ entry:
|
|||
%vecext = extractelement <4 x i32> %0, i32 3
|
||||
ret i32 %vecext
|
||||
; CHECK-LABEL: @getsi3
|
||||
; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
|
||||
; CHECK: xxswapd [[SHL:[0-9]+]], 34
|
||||
; CHECK: mfvsrwz 3, [[SHL]]
|
||||
; CHECK: extsw 3, 3
|
||||
; CHECK-LE-LABEL: @getsi3
|
||||
|
@ -1172,7 +1172,7 @@ entry:
|
|||
; CHECK: mfvsrwz 3, [[SHL]]
|
||||
; CHECK: clrldi 3, 3, 32
|
||||
; CHECK-LE-LABEL: @getui0
|
||||
; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
|
||||
; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
|
||||
; CHECK-LE: mfvsrwz 3, [[SHL]]
|
||||
; CHECK-LE: clrldi 3, 3, 32
|
||||
}
|
||||
|
@ -1220,7 +1220,7 @@ entry:
|
|||
%vecext = extractelement <4 x i32> %0, i32 3
|
||||
ret i32 %vecext
|
||||
; CHECK-LABEL: @getui3
|
||||
; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
|
||||
; CHECK: xxswapd [[SHL:[0-9]+]], 34
|
||||
; CHECK: mfvsrwz 3, [[SHL]]
|
||||
; CHECK: clrldi 3, 3, 32
|
||||
; CHECK-LE-LABEL: @getui3
|
||||
|
@ -1380,7 +1380,7 @@ entry:
|
|||
; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
|
||||
; CHECK: xscvspdpn 1, [[SHL]]
|
||||
; CHECK-LE-LABEL: @getf1
|
||||
; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
|
||||
; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
|
||||
; CHECK-LE: xscvspdpn 1, [[SHL]]
|
||||
}
|
||||
|
||||
|
@ -1393,7 +1393,7 @@ entry:
|
|||
%vecext = extractelement <4 x float> %0, i32 2
|
||||
ret float %vecext
|
||||
; CHECK-LABEL: @getf2
|
||||
; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
|
||||
; CHECK: xxswapd [[SHL:[0-9]+]], 34
|
||||
; CHECK: xscvspdpn 1, [[SHL]]
|
||||
; CHECK-LE-LABEL: @getf2
|
||||
; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
|
||||
|
|
|
@ -9,8 +9,8 @@ entry:
|
|||
ret <2 x i32> %strided.vec
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: vsldoi [[TGT:[0-9]+]], 2, 2, 8
|
||||
; CHECK: vmrghw 2, 2, [[TGT]]
|
||||
; CHECK: xxswapd 35, 34
|
||||
; CHECK: vmrghw 2, 2, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue