[AArch64] Fixes for ARMv8.2-A FP16 scalar intrinsic - clang portion

https://reviews.llvm.org/D42993

llvm-svn: 324940
This commit is contained in:
Abderrazek Zaafrani 2018-02-12 21:26:06 +00:00
parent 88939fefe8
commit e7ed880761
2 changed files with 145 additions and 48 deletions

View File

@ -4160,45 +4160,28 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
// FP16 scalar intrinisics go here.
NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
NEONMAP1(vabsh_f16, aarch64_neon_abs, Add1ArgType),
NEONMAP1(vcageh_f16, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcagth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType),
NEONMAP1(vcaleh_f16, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcalth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType),
NEONMAP1(vcvtah_s16_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
NEONMAP1(vcvtah_u16_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_f16_s16, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_f16_u16, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_s16_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_u16_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_s16_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_u16_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
NEONMAP1(vcvtnh_s16_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
NEONMAP1(vcvtnh_u16_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
NEONMAP1(vcvtph_s16_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
NEONMAP1(vcvtph_u16_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
@ -6188,6 +6171,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
// Handle non-overloaded intrinsics first.
switch (BuiltinID) {
default: break;
case NEON::BI__builtin_neon_vabsh_f16:
Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
case NEON::BI__builtin_neon_vldrq_p128: {
llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
@ -6282,6 +6268,101 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateFPToUI(Ops[0], Int64Ty);
return Builder.CreateFPToSI(Ops[0], Int64Ty);
}
case NEON::BI__builtin_neon_vcvtah_u16_f16:
case NEON::BI__builtin_neon_vcvtmh_u16_f16:
case NEON::BI__builtin_neon_vcvtnh_u16_f16:
case NEON::BI__builtin_neon_vcvtph_u16_f16:
case NEON::BI__builtin_neon_vcvtah_s16_f16:
case NEON::BI__builtin_neon_vcvtmh_s16_f16:
case NEON::BI__builtin_neon_vcvtnh_s16_f16:
case NEON::BI__builtin_neon_vcvtph_s16_f16: {
unsigned Int;
llvm::Type* InTy = Int32Ty;
llvm::Type* FTy = HalfTy;
llvm::Type *Tys[2] = {InTy, FTy};
Ops.push_back(EmitScalarExpr(E->getArg(0)));
switch (BuiltinID) {
default: llvm_unreachable("missing builtin ID in switch!");
case NEON::BI__builtin_neon_vcvtah_u16_f16:
Int = Intrinsic::aarch64_neon_fcvtau; break;
case NEON::BI__builtin_neon_vcvtmh_u16_f16:
Int = Intrinsic::aarch64_neon_fcvtmu; break;
case NEON::BI__builtin_neon_vcvtnh_u16_f16:
Int = Intrinsic::aarch64_neon_fcvtnu; break;
case NEON::BI__builtin_neon_vcvtph_u16_f16:
Int = Intrinsic::aarch64_neon_fcvtpu; break;
case NEON::BI__builtin_neon_vcvtah_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtas; break;
case NEON::BI__builtin_neon_vcvtmh_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtms; break;
case NEON::BI__builtin_neon_vcvtnh_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtns; break;
case NEON::BI__builtin_neon_vcvtph_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtps; break;
}
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
return Builder.CreateTrunc(Ops[0], Int16Ty);
}
case NEON::BI__builtin_neon_vcaleh_f16:
case NEON::BI__builtin_neon_vcalth_f16:
case NEON::BI__builtin_neon_vcageh_f16:
case NEON::BI__builtin_neon_vcagth_f16: {
unsigned Int;
llvm::Type* InTy = Int32Ty;
llvm::Type* FTy = HalfTy;
llvm::Type *Tys[2] = {InTy, FTy};
Ops.push_back(EmitScalarExpr(E->getArg(1)));
switch (BuiltinID) {
default: llvm_unreachable("missing builtin ID in switch!");
case NEON::BI__builtin_neon_vcageh_f16:
Int = Intrinsic::aarch64_neon_facge; break;
case NEON::BI__builtin_neon_vcagth_f16:
Int = Intrinsic::aarch64_neon_facgt; break;
case NEON::BI__builtin_neon_vcaleh_f16:
Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
case NEON::BI__builtin_neon_vcalth_f16:
Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
}
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
return Builder.CreateTrunc(Ops[0], Int16Ty);
}
case NEON::BI__builtin_neon_vcvth_n_s16_f16:
case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
unsigned Int;
llvm::Type* InTy = Int32Ty;
llvm::Type* FTy = HalfTy;
llvm::Type *Tys[2] = {InTy, FTy};
Ops.push_back(EmitScalarExpr(E->getArg(1)));
switch (BuiltinID) {
default: llvm_unreachable("missing builtin ID in switch!");
case NEON::BI__builtin_neon_vcvth_n_s16_f16:
Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
case NEON::BI__builtin_neon_vcvth_n_u16_f16:
Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
}
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
return Builder.CreateTrunc(Ops[0], Int16Ty);
}
case NEON::BI__builtin_neon_vcvth_n_f16_s16:
case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
unsigned Int;
llvm::Type* FTy = HalfTy;
llvm::Type* InTy = Int32Ty;
llvm::Type *Tys[2] = {FTy, InTy};
Ops.push_back(EmitScalarExpr(E->getArg(1)));
switch (BuiltinID) {
default: llvm_unreachable("missing builtin ID in switch!");
case NEON::BI__builtin_neon_vcvth_n_f16_s16:
Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
break;
case NEON::BI__builtin_neon_vcvth_n_f16_u16:
Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
Ops[0] = Builder.CreateZExt(Ops[0], InTy);
break;
}
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
}
case NEON::BI__builtin_neon_vpaddd_s64: {
llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
Value *Vec = EmitScalarExpr(E->getArg(0));

View File

@ -8,7 +8,7 @@
#include <arm_fp16.h>
// CHECK-LABEL: test_vabsh_f16
// CHECK: [[ABS:%.*]] = call half @llvm.aarch64.neon.abs.f16(half %a)
// CHECK: [[ABS:%.*]] = call half @llvm.fabs.f16(half %a)
// CHECK: ret half [[ABS]]
float16_t test_vabsh_f16(float16_t a) {
return vabsh_f16(a);
@ -139,8 +139,9 @@ uint64_t test_vcvth_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtah_s16_f16
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a)
// CHECK: ret i16 [[VCVT]]
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
// CHECK: ret i16 [[RET]]
int16_t test_vcvtah_s16_f16 (float16_t a) {
return vcvtah_s16_f16(a);
}
@ -160,8 +161,9 @@ int64_t test_vcvtah_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtah_u16_f16
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a)
// CHECK: ret i16 [[VCVT]]
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
// CHECK: ret i16 [[RET]]
uint16_t test_vcvtah_u16_f16 (float16_t a) {
return vcvtah_u16_f16(a);
}
@ -181,8 +183,9 @@ uint64_t test_vcvtah_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtmh_s16_f16
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a)
// CHECK: ret i16 [[VCVT]]
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
// CHECK: ret i16 [[RET]]
int16_t test_vcvtmh_s16_f16 (float16_t a) {
return vcvtmh_s16_f16(a);
}
@ -202,8 +205,9 @@ int64_t test_vcvtmh_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtmh_u16_f16
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a)
// CHECK: ret i16 [[VCVT]]
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
// CHECK: ret i16 [[RET]]
uint16_t test_vcvtmh_u16_f16 (float16_t a) {
return vcvtmh_u16_f16(a);
}
@ -223,8 +227,9 @@ uint64_t test_vcvtmh_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtnh_s16_f16
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a)
// CHECK: ret i16 [[VCVT]]
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
// CHECK: ret i16 [[RET]]
int16_t test_vcvtnh_s16_f16 (float16_t a) {
return vcvtnh_s16_f16(a);
}
@ -244,8 +249,9 @@ int64_t test_vcvtnh_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtnh_u16_f16
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a)
// CHECK: ret i16 [[VCVT]]
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
// CHECK: ret i16 [[RET]]
uint16_t test_vcvtnh_u16_f16 (float16_t a) {
return vcvtnh_u16_f16(a);
}
@ -265,8 +271,9 @@ uint64_t test_vcvtnh_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtph_s16_f16
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a)
// CHECK: ret i16 [[VCVT]]
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
// CHECK: ret i16 [[RET]]
int16_t test_vcvtph_s16_f16 (float16_t a) {
return vcvtph_s16_f16(a);
}
@ -286,8 +293,9 @@ int64_t test_vcvtph_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtph_u16_f16
// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a)
// CHECK: ret i16 [[VCVT]]
// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
// CHECK: ret i16 [[RET]]
uint16_t test_vcvtph_u16_f16 (float16_t a) {
return vcvtph_u16_f16(a);
}
@ -405,29 +413,33 @@ float16_t test_vabdh_f16(float16_t a, float16_t b) {
}
// CHECK-LABEL: test_vcageh_f16
// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facge.i16.f16(half %a, half %b)
// CHECK: ret i16 [[ABS]]
// CHECK: [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %a, half %b)
// CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
// CHECK: ret i16 [[RET]]
uint16_t test_vcageh_f16(float16_t a, float16_t b) {
return vcageh_f16(a, b);
}
// CHECK-LABEL: test_vcagth_f16
// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facgt.i16.f16(half %a, half %b)
// CHECK: ret i16 [[ABS]]
// CHECK: [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f16(half %a, half %b)
// CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
// CHECK: ret i16 [[RET]]
uint16_t test_vcagth_f16(float16_t a, float16_t b) {
return vcagth_f16(a, b);
}
// CHECK-LABEL: test_vcaleh_f16
// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facge.i16.f16(half %a, half %b)
// CHECK: ret i16 [[ABS]]
// CHECK: [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %b, half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
// CHECK: ret i16 [[RET]]
uint16_t test_vcaleh_f16(float16_t a, float16_t b) {
return vcaleh_f16(a, b);
}
// CHECK-LABEL: test_vcalth_f16
// CHECK: [[ABS:%.*]] = call i16 @llvm.aarch64.neon.facgt.i16.f16(half %a, half %b)
// CHECK: ret i16 [[ABS]]
// CHECK: [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f16(half %b, half %a)
// CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
// CHECK: ret i16 [[RET]]
uint16_t test_vcalth_f16(float16_t a, float16_t b) {
return vcalth_f16(a, b);
}
@ -473,7 +485,8 @@ uint16_t test_vclth_f16(float16_t a, float16_t b) {
}
// CHECK-LABEL: test_vcvth_n_f16_s16
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i16(i16 %a, i32 0)
// CHECK: [[SEXT:%.*]] = sext i16 %a to i32
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 [[SEXT]], i32 0)
// CHECK: ret half [[CVT]]
float16_t test_vcvth_n_f16_s16(int16_t a) {
return vcvth_n_f16_s16(a, 0);
@ -494,8 +507,9 @@ float16_t test_vcvth_n_f16_s64(int64_t a) {
}
// CHECK-LABEL: test_vcvth_n_s16_f16
// CHECK: [[CVT:%.*]] = call i16 @llvm.aarch64.neon.vcvtfp2fxs.i16.f16(half %a, i32 0)
// CHECK: ret i16 [[CVT]]
// CHECK: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 0)
// CHECK: [[RET:%.*]] = trunc i32 [[CVT]] to i16
// CHECK: ret i16 [[RET]]
int16_t test_vcvth_n_s16_f16(float16_t a) {
return vcvth_n_s16_f16(a, 0);
}
@ -515,7 +529,8 @@ int64_t test_vcvth_n_s64_f16(float16_t a) {
}
// CHECK-LABEL: test_vcvth_n_f16_u16
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i16(i16 %a, i32 0)
// CHECK: [[SEXT:%.*]] = zext i16 %a to i32
// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 [[SEXT]], i32 0)
// CHECK: ret half [[CVT]]
float16_t test_vcvth_n_f16_u16(int16_t a) {
return vcvth_n_f16_u16(a, 0);
@ -536,8 +551,9 @@ float16_t test_vcvth_n_f16_u64(int64_t a) {
}
// CHECK-LABEL: test_vcvth_n_u16_f16
// CHECK: [[CVT:%.*]] = call i16 @llvm.aarch64.neon.vcvtfp2fxu.i16.f16(half %a, i32 0)
// CHECK: ret i16 [[CVT]]
// CHECK: [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 0)
// CHECK: [[RET:%.*]] = trunc i32 [[CVT]] to i16
// CHECK: ret i16 [[RET]]
int16_t test_vcvth_n_u16_f16(float16_t a) {
return vcvth_n_u16_f16(a, 0);
}