[PowerPC] Remove redundant direct moves when extracting integers and converting to FP

This patch corresponds to review:
https://reviews.llvm.org/D21354

We use direct moves for extracting integer elements from vectors. We also use
direct moves when converting integers to FP. When these operations are chained,
we get a direct move out of a VSR followed by a direct move back into a VSR.
These are redundant - all we need to do is line up the element and convert.

llvm-svn: 275796
This commit is contained in:
Nemanja Ivanovic 2016-07-18 15:30:00 +00:00
parent a645433c5f
commit d3c284f645
2 changed files with 150 additions and 0 deletions

View File

@ -1029,6 +1029,28 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
(XVRSQRTEDP $A)>;
let Predicates = [IsLittleEndian] in {
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
(f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
} // IsLittleEndian
let Predicates = [IsBigEndian] in {
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
(f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
} // IsBigEndian
} // AddedComplexity
} // HasVSX
@ -1235,6 +1257,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
let Predicates = [IsLittleEndian] in {
def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
(f32 (XSCVSXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
}
let Predicates = [IsBigEndian] in {
def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
(f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
}
} // AddedComplexity = 400
} // HasP8Vector

View File

@ -0,0 +1,107 @@
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
define double @test1(<2 x i64> %a) {
entry:
; CHECK-LABEL: test1
; CHECK: xxswapd [[SW:[0-9]+]], 34
; CHECK: xscvsxddp 1, [[SW]]
; CHECK-BE-LABEL: test1
; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
; CHECK-BE: xscvsxddp 1, [[CP]]
%0 = extractelement <2 x i64> %a, i32 0
%1 = sitofp i64 %0 to double
ret double %1
}
define double @test2(<2 x i64> %a) {
entry:
; CHECK-LABEL: test2
; CHECK: xxlor [[CP:[0-9]+]], 34, 34
; CHECK: xscvsxddp 1, [[CP]]
; CHECK-BE-LABEL: test2
; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
; CHECK-BE: xscvsxddp 1, [[SW]]
%0 = extractelement <2 x i64> %a, i32 1
%1 = sitofp i64 %0 to double
ret double %1
}
define float @test1f(<2 x i64> %a) {
entry:
; CHECK-LABEL: test1f
; CHECK: xxswapd [[SW:[0-9]+]], 34
; CHECK: xscvsxdsp 1, [[SW]]
; CHECK-BE-LABEL: test1f
; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
; CHECK-BE: xscvsxdsp 1, [[CP]]
%0 = extractelement <2 x i64> %a, i32 0
%1 = sitofp i64 %0 to float
ret float %1
}
define float @test2f(<2 x i64> %a) {
entry:
; CHECK-LABEL: test2f
; CHECK: xxlor [[CP:[0-9]+]], 34, 34
; CHECK: xscvsxdsp 1, [[CP]]
; CHECK-BE-LABEL: test2f
; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
; CHECK-BE: xscvsxdsp 1, [[SW]]
%0 = extractelement <2 x i64> %a, i32 1
%1 = sitofp i64 %0 to float
ret float %1
}
define double @test1u(<2 x i64> %a) {
entry:
; CHECK-LABEL: test1u
; CHECK: xxswapd [[SW:[0-9]+]], 34
; CHECK: xscvuxddp 1, [[SW]]
; CHECK-BE-LABEL: test1u
; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
; CHECK-BE: xscvuxddp 1, [[CP]]
%0 = extractelement <2 x i64> %a, i32 0
%1 = uitofp i64 %0 to double
ret double %1
}
define double @test2u(<2 x i64> %a) {
entry:
; CHECK-LABEL: test2u
; CHECK: xxlor [[CP:[0-9]+]], 34, 34
; CHECK: xscvuxddp 1, [[CP]]
; CHECK-BE-LABEL: test2u
; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
; CHECK-BE: xscvuxddp 1, [[SW]]
%0 = extractelement <2 x i64> %a, i32 1
%1 = uitofp i64 %0 to double
ret double %1
}
define float @test1fu(<2 x i64> %a) {
entry:
; CHECK-LABEL: test1fu
; CHECK: xxswapd [[SW:[0-9]+]], 34
; CHECK: xscvuxdsp 1, [[SW]]
; CHECK-BE-LABEL: test1fu
; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
; CHECK-BE: xscvuxdsp 1, [[CP]]
%0 = extractelement <2 x i64> %a, i32 0
%1 = uitofp i64 %0 to float
ret float %1
}
define float @test2fu(<2 x i64> %a) {
entry:
; CHECK-LABEL: test2fu
; CHECK: xxlor [[CP:[0-9]+]], 34, 34
; CHECK: xscvuxdsp 1, [[CP]]
; CHECK-BE-LABEL: test2fu
; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
; CHECK-BE: xscvuxdsp 1, [[SW]]
%0 = extractelement <2 x i64> %a, i32 1
%1 = uitofp i64 %0 to float
ret float %1
}