[PowerPC] Implement BE VSX load/store builtins - llvm portion.
This patch implements all the overloads for vec_xl_be and vec_xst_be. On BE, they behaves exactly the same with vec_xl and vec_xst, therefore they are simply implemented by defining a matching macro. On LE, they are implemented by defining new builtins and intrinsics. For int/float/long long/double, it is just a load (lxvw4x/lxvd2x) or store(stxvw4x/stxvd2x). For char/char/short, we also need some extra shuffling before or after call the builtins to get the desired BE order. For int128, simply call vec_xl or vec_xst. llvm-svn: 286967
This commit is contained in:
parent
3776e76201
commit
5f850cd1b1
|
@ -737,13 +737,20 @@ def int_ppc_vsx_lxvw4x :
|
|||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_lxvd2x :
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_lxvw4x_be :
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_lxvd2x_be :
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
// Vector store.
|
||||
def int_ppc_vsx_stxvw4x :
|
||||
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_stxvd2x :
|
||||
Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
|
||||
|
||||
def int_ppc_vsx_stxvw4x_be :
|
||||
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
|
||||
def int_ppc_vsx_stxvd2x_be :
|
||||
Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
|
||||
// Vector and scalar maximum.
|
||||
def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
|
||||
def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;
|
||||
|
|
|
@ -9784,9 +9784,11 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
|
|||
case Intrinsic::ppc_altivec_lvx:
|
||||
case Intrinsic::ppc_altivec_lvxl:
|
||||
case Intrinsic::ppc_vsx_lxvw4x:
|
||||
case Intrinsic::ppc_vsx_lxvw4x_be:
|
||||
VT = MVT::v4i32;
|
||||
break;
|
||||
case Intrinsic::ppc_vsx_lxvd2x:
|
||||
case Intrinsic::ppc_vsx_lxvd2x_be:
|
||||
VT = MVT::v2f64;
|
||||
break;
|
||||
case Intrinsic::ppc_altivec_lvebx:
|
||||
|
@ -9833,6 +9835,12 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
|
|||
case Intrinsic::ppc_vsx_stxvd2x:
|
||||
VT = MVT::v2f64;
|
||||
break;
|
||||
case Intrinsic::ppc_vsx_stxvw4x_be:
|
||||
VT = MVT::v4i32;
|
||||
break;
|
||||
case Intrinsic::ppc_vsx_stxvd2x_be:
|
||||
VT = MVT::v2f64;
|
||||
break;
|
||||
case Intrinsic::ppc_altivec_stvebx:
|
||||
VT = MVT::i8;
|
||||
break;
|
||||
|
|
|
@ -1020,6 +1020,10 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
|
|||
(STXVD2X $rS, xoaddr:$dst)>;
|
||||
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
|
||||
(STXVW4X $rS, xoaddr:$dst)>;
|
||||
def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
|
||||
(STXVD2X $rS, xoaddr:$dst)>;
|
||||
def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
|
||||
(STXVW4X $rS, xoaddr:$dst)>;
|
||||
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
|
||||
}
|
||||
let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
|
||||
|
@ -1848,6 +1852,9 @@ let Predicates = [IsLittleEndian, HasVSX] in
|
|||
def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
|
||||
(f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
|
||||
|
||||
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
|
||||
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
|
||||
|
||||
let Predicates = [IsLittleEndian, HasDirectMove] in {
|
||||
// v16i8 scalar <-> vector conversions (LE)
|
||||
def : Pat<(v16i8 (scalar_to_vector i32:$A)),
|
||||
|
|
|
@ -1190,3 +1190,51 @@ entry:
|
|||
; CHECK-LE: xscmpudp [[REG:[0-9]+]], 3, 4
|
||||
; CHECK-LE: beqlr [[REG]]
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define <4 x i32> @test83(i8* %a) {
|
||||
entry:
|
||||
%0 = tail call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %a)
|
||||
ret <4 x i32> %0
|
||||
; CHECK-LABEL: test83
|
||||
; CHECK: lxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8*)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define <2 x double> @test84(i8* %a) {
|
||||
entry:
|
||||
%0 = tail call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %a)
|
||||
ret <2 x double> %0
|
||||
; CHECK-LABEL: test84
|
||||
; CHECK: lxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8*)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define void @test85(<4 x i32> %a, i8* %b) {
|
||||
entry:
|
||||
tail call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %a, i8* %b)
|
||||
ret void
|
||||
; CHECK-LABEL: test85
|
||||
; CHECK: stxvw4x 34, 0, 5
|
||||
; CHECK: blr
|
||||
}
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.ppc.vsx.stxvw4x.be(<4 x i32>, i8*)
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define void @test86(<2 x double> %a, i8* %b) {
|
||||
entry:
|
||||
tail call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %a, i8* %b)
|
||||
ret void
|
||||
; CHECK-LABEL: test86
|
||||
; CHECK: stxvd2x 34, 0, 5
|
||||
; CHECK: blr
|
||||
}
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.ppc.vsx.stxvd2x.be(<2 x double>, i8*)
|
||||
|
|
Loading…
Reference in New Issue