[PowerPC] Improve BUILD_VECTOR of 4 i32s
Currently, for this node: vector int test(int a, int b, int c, int d) { return (vector int) { a, b, c, d }; } we get this on Power9: mtvsrdd 34, 5, 3 mtvsrdd 35, 6, 4 vmrgow 2, 3, 2 and this on Power8: mtvsrwz 0, 3 mtvsrwz 1, 5 mtvsrwz 2, 4 mtvsrwz 3, 6 xxmrghd 34, 1, 0 xxmrghd 35, 3, 2 vmrgow 2, 3, 2 This can be improved to this on LE Power9: rldimi 3, 4, 32, 0 rldimi 5, 6, 32, 0 mtvsrdd 34, 5, 3 and this on LE Power8 rldimi 3, 4, 32, 0 rldimi 5, 6, 32, 0 mtvsrd 34, 3 mtvsrd 35, 5 xxpermdi 34, 35, 34, 0 This patch updates the TD pattern to generate the optimized sequence for both Power8 and Power9 on LE and BE. Differential Revision: https://reviews.llvm.org/D53494 llvm-svn: 345414
This commit is contained in:
parent
3cc0e935c4
commit
de20843f6f
|
@ -3873,10 +3873,11 @@ let AddedComplexity = 400 in {
|
|||
(COPY_TO_REGCLASS (MTVSRD $A), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0),
|
||||
(XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>;
|
||||
(XXPERMDI
|
||||
(COPY_TO_REGCLASS
|
||||
(MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
|
||||
(COPY_TO_REGCLASS
|
||||
(MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
|
||||
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
|
||||
}
|
||||
|
@ -3888,10 +3889,11 @@ let AddedComplexity = 400 in {
|
|||
(COPY_TO_REGCLASS (MTVSRD $B), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0),
|
||||
(XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>;
|
||||
(XXPERMDI
|
||||
(COPY_TO_REGCLASS
|
||||
(MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
|
||||
(COPY_TO_REGCLASS
|
||||
(MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
|
||||
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
|
||||
}
|
||||
|
@ -3944,10 +3946,9 @@ let AddedComplexity = 400 in {
|
|||
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
|
||||
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW
|
||||
(v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)),
|
||||
(v4i32
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>;
|
||||
(MTVSRDD
|
||||
(RLDIMI AnyExts.B, AnyExts.A, 32, 0),
|
||||
(RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
|
||||
|
@ -3957,10 +3958,9 @@ let AddedComplexity = 400 in {
|
|||
def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
|
||||
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW
|
||||
(v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)),
|
||||
(v4i32
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>;
|
||||
(MTVSRDD
|
||||
(RLDIMI AnyExts.C, AnyExts.D, 32, 0),
|
||||
(RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
|
||||
}
|
||||
// P9 Altivec instructions that can be used to build vectors.
|
||||
// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
|
||||
|
|
|
@ -838,28 +838,26 @@ entry:
|
|||
; P9LE-LABEL: fromRegsi
|
||||
; P8BE-LABEL: fromRegsi
|
||||
; P8LE-LABEL: fromRegsi
|
||||
; P9BE-DAG: mtvsrdd [[REG1:v[0-9]+]], r3, r5
|
||||
; P9BE-DAG: mtvsrdd [[REG2:v[0-9]+]], r4, r6
|
||||
; P9BE: vmrgow v2, [[REG1]], [[REG2]]
|
||||
; P9BE-DAG: rldimi r6, r5, 32, 0
|
||||
; P9BE-DAG: rldimi r4, r3, 32, 0
|
||||
; P9BE: mtvsrdd v2, r4, r6
|
||||
; P9BE: blr
|
||||
; P9LE-DAG: mtvsrdd [[REG1:v[0-9]+]], r5, r3
|
||||
; P9LE-DAG: mtvsrdd [[REG2:v[0-9]+]], r6, r4
|
||||
; P9LE: vmrgow v2, [[REG2]], [[REG1]]
|
||||
; P9LE-DAG: rldimi r3, r4, 32, 0
|
||||
; P9LE-DAG: rldimi r5, r6, 32, 0
|
||||
; P9LE: mtvsrdd v2, r5, r3
|
||||
; P9LE: blr
|
||||
; P8BE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3
|
||||
; P8BE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4
|
||||
; P8BE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5
|
||||
; P8BE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6
|
||||
; P8BE-DAG: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG1]], {{[v][s]*}}[[REG3]]
|
||||
; P8BE-DAG: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG2]], {{[v][s]*}}[[REG4]]
|
||||
; P8BE: vmrgow v2, [[REG5]], [[REG6]]
|
||||
; P8LE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3
|
||||
; P8LE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4
|
||||
; P8LE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5
|
||||
; P8LE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6
|
||||
; P8LE: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG3]], {{[v][s]*}}[[REG1]]
|
||||
; P8LE: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG4]], {{[v][s]*}}[[REG2]]
|
||||
; P8LE: vmrgow v2, [[REG6]], [[REG5]]
|
||||
; P8BE-DAG: rldimi r6, r5, 32, 0
|
||||
; P8BE-DAG: rldimi r4, r3, 32, 0
|
||||
; P8BE-DAG: mtvsrd f[[REG1:[0-9]+]], r6
|
||||
; P8BE-DAG: mtvsrd f[[REG2:[0-9]+]], r4
|
||||
; P8BE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
|
||||
; P8BE: blr
|
||||
; P8LE-DAG: rldimi r3, r4, 32, 0
|
||||
; P8LE-DAG: rldimi r5, r6, 32, 0
|
||||
; P8LE-DAG: mtvsrd f[[REG1:[0-9]+]], r3
|
||||
; P8LE-DAG: mtvsrd f[[REG2:[0-9]+]], r5
|
||||
; P8LE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
|
||||
; P8LE: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
|
@ -1065,38 +1063,34 @@ entry:
|
|||
; P9BE: lwz
|
||||
; P9BE: lwz
|
||||
; P9BE: lwz
|
||||
; P9BE: rldimi
|
||||
; P9BE: rldimi
|
||||
; P9BE: mtvsrdd
|
||||
; P9BE: mtvsrdd
|
||||
; P9BE: vmrgow
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: rldimi
|
||||
; P9LE: rldimi
|
||||
; P9LE: mtvsrdd
|
||||
; P9LE: mtvsrdd
|
||||
; P9LE: vmrgow
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: rldimi
|
||||
; P8BE: rldimi
|
||||
; P8BE: mtvsrd
|
||||
; P8BE: mtvsrd
|
||||
; P8BE: xxmrghd
|
||||
; P8BE: xxmrghd
|
||||
; P8BE: vmrgow
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: rldimi
|
||||
; P8LE: rldimi
|
||||
; P8LE: mtvsrd
|
||||
; P8LE: mtvsrd
|
||||
; P8LE: xxmrghd
|
||||
; P8LE: xxmrghd
|
||||
; P8LE: vmrgow
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly
|
||||
|
@ -1132,41 +1126,37 @@ entry:
|
|||
; P9BE: lwz
|
||||
; P9BE: lwz
|
||||
; P9BE: lwz
|
||||
; P9BE: rldimi
|
||||
; P9BE: rldimi
|
||||
; P9BE: mtvsrdd
|
||||
; P9BE: mtvsrdd
|
||||
; P9BE: vmrgow
|
||||
; P9LE: sldi r4, r4, 2
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: rldimi
|
||||
; P9LE: rldimi
|
||||
; P9LE: mtvsrdd
|
||||
; P9LE: mtvsrdd
|
||||
; P9LE: vmrgow
|
||||
; P8BE: sldi r4, r4, 2
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: rldimi
|
||||
; P8BE: rldimi
|
||||
; P8BE: mtvsrd
|
||||
; P8BE: mtvsrd
|
||||
; P8BE: xxmrghd
|
||||
; P8BE: xxmrghd
|
||||
; P8BE: vmrgow
|
||||
; P8LE: sldi r4, r4, 2
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: rldimi
|
||||
; P8LE: rldimi
|
||||
; P8LE: mtvsrd
|
||||
; P8LE: mtvsrd
|
||||
; P8LE: xxmrghd
|
||||
; P8LE: xxmrghd
|
||||
; P8LE: vmrgow
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
|
@ -1978,28 +1968,26 @@ entry:
|
|||
; P9LE-LABEL: fromRegsui
|
||||
; P8BE-LABEL: fromRegsui
|
||||
; P8LE-LABEL: fromRegsui
|
||||
; P9BE-DAG: mtvsrdd [[REG1:v[0-9]+]], r3, r5
|
||||
; P9BE-DAG: mtvsrdd [[REG2:v[0-9]+]], r4, r6
|
||||
; P9BE: vmrgow v2, [[REG1]], [[REG2]]
|
||||
; P9BE-DAG: rldimi r6, r5, 32, 0
|
||||
; P9BE-DAG: rldimi r4, r3, 32, 0
|
||||
; P9BE: mtvsrdd v2, r4, r6
|
||||
; P9BE: blr
|
||||
; P9LE-DAG: mtvsrdd [[REG1:v[0-9]+]], r5, r3
|
||||
; P9LE-DAG: mtvsrdd [[REG2:v[0-9]+]], r6, r4
|
||||
; P9LE: vmrgow v2, [[REG2]], [[REG1]]
|
||||
; P9LE-DAG: rldimi r3, r4, 32, 0
|
||||
; P9LE-DAG: rldimi r5, r6, 32, 0
|
||||
; P9LE: mtvsrdd v2, r5, r3
|
||||
; P9LE: blr
|
||||
; P8BE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3
|
||||
; P8BE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4
|
||||
; P8BE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5
|
||||
; P8BE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6
|
||||
; P8BE-DAG: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG1]], {{[v][s]*}}[[REG3]]
|
||||
; P8BE-DAG: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG2]], {{[v][s]*}}[[REG4]]
|
||||
; P8BE: vmrgow v2, [[REG5]], [[REG6]]
|
||||
; P8LE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3
|
||||
; P8LE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4
|
||||
; P8LE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5
|
||||
; P8LE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6
|
||||
; P8LE: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG3]], {{[v][s]*}}[[REG1]]
|
||||
; P8LE: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG4]], {{[v][s]*}}[[REG2]]
|
||||
; P8LE: vmrgow v2, [[REG6]], [[REG5]]
|
||||
; P8BE-DAG: rldimi r6, r5, 32, 0
|
||||
; P8BE-DAG: rldimi r4, r3, 32, 0
|
||||
; P8BE-DAG: mtvsrd f[[REG1:[0-9]+]], r6
|
||||
; P8BE-DAG: mtvsrd f[[REG2:[0-9]+]], r4
|
||||
; P8BE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
|
||||
; P8BE: blr
|
||||
; P8LE-DAG: rldimi r3, r4, 32, 0
|
||||
; P8LE-DAG: rldimi r5, r6, 32, 0
|
||||
; P8LE-DAG: mtvsrd f[[REG1:[0-9]+]], r3
|
||||
; P8LE-DAG: mtvsrd f[[REG2:[0-9]+]], r5
|
||||
; P8LE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
|
||||
; P8LE: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
|
@ -2207,38 +2195,34 @@ entry:
|
|||
; P9BE: lwz
|
||||
; P9BE: lwz
|
||||
; P9BE: lwz
|
||||
; P9BE: rldimi
|
||||
; P9BE: rldimi
|
||||
; P9BE: mtvsrdd
|
||||
; P9BE: mtvsrdd
|
||||
; P9BE: vmrgow
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: rldimi
|
||||
; P9LE: rldimi
|
||||
; P9LE: mtvsrdd
|
||||
; P9LE: mtvsrdd
|
||||
; P9LE: vmrgow
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: rldimi
|
||||
; P8BE: rldimi
|
||||
; P8BE: mtvsrd
|
||||
; P8BE: mtvsrd
|
||||
; P8BE: xxmrghd
|
||||
; P8BE: xxmrghd
|
||||
; P8BE: vmrgow
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: rldimi
|
||||
; P8LE: rldimi
|
||||
; P8LE: mtvsrd
|
||||
; P8LE: mtvsrd
|
||||
; P8LE: xxmrghd
|
||||
; P8LE: xxmrghd
|
||||
; P8LE: vmrgow
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly
|
||||
|
@ -2274,41 +2258,37 @@ entry:
|
|||
; P9BE: lwz
|
||||
; P9BE: lwz
|
||||
; P9BE: lwz
|
||||
; P9BE: rldimi
|
||||
; P9BE: rldimi
|
||||
; P9BE: mtvsrdd
|
||||
; P9BE: mtvsrdd
|
||||
; P9BE: vmrgow
|
||||
; P9LE: sldi r4, r4, 2
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: lwz
|
||||
; P9LE: rldimi
|
||||
; P9LE: rldimi
|
||||
; P9LE: mtvsrdd
|
||||
; P9LE: mtvsrdd
|
||||
; P9LE: vmrgow
|
||||
; P8BE: sldi r4, r4, 2
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: lwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: mtvsrwz
|
||||
; P8BE: rldimi
|
||||
; P8BE: rldimi
|
||||
; P8BE: mtvsrd
|
||||
; P8BE: mtvsrd
|
||||
; P8BE: xxmrghd
|
||||
; P8BE: xxmrghd
|
||||
; P8BE: vmrgow
|
||||
; P8LE: sldi r4, r4, 2
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: lwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: mtvsrwz
|
||||
; P8LE: rldimi
|
||||
; P8LE: rldimi
|
||||
; P8LE: mtvsrd
|
||||
; P8LE: mtvsrd
|
||||
; P8LE: xxmrghd
|
||||
; P8LE: xxmrghd
|
||||
; P8LE: vmrgow
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
|
|
Loading…
Reference in New Issue