[AArch64] allow v4f16 types when FullFP16 is supported

Support for scalars was committed in r311154, this adds support for allowing
v4f16 vector types (thus avoiding conversions from/to single precision for
these types).

Differential Revision: https://reviews.llvm.org/D37145

llvm-svn: 312104
This commit is contained in:
Sjoerd Meijer 2017-08-30 08:38:13 +00:00
parent 767d98bad8
commit be5b60f735
3 changed files with 693 additions and 390 deletions

View File

@ -326,17 +326,27 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::v4f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
if (!Subtarget->hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
@ -361,53 +371,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
}
// v4f16 is also a storage-only type, so promote it to v4f32 when that is
// known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
// Expand all other v4f16 operations.
// FIXME: We could generate better code by promoting some operations to
// a pair of v4f32s
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
setOperationAction(ISD::FREM, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
// v8f16 is also a storage-only type, so expand it.
setOperationAction(ISD::FABS, MVT::v8f16, Expand);
setOperationAction(ISD::FADD, MVT::v8f16, Expand);

View File

@ -1,14 +1,30 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16
;;; Float vectors
%v2f32 = type <2 x float>
; CHECK: test_v2f32.sqrt:
%v4f16 = type <4 x half>
; CHECK-LABEL: test_v2f32.sqrt:
define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
; CHECK: fsqrt.2s
%1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
ret %v2f32 %1
}
define %v4f16 @test_v4f16.sqrt(%v4f16 %a) {
; CHECK-LABEL: test_v4f16.sqrt:
; CHECK: fsqrt s{{.}}, s{{.}}
; CHECK: fsqrt s{{.}}, s{{.}}
; CHECK: fsqrt s{{.}}, s{{.}}
; CHECK: fsqrt s{{.}}, s{{.}}
; CHECK-FP16-LABEL: test_v4f16.sqrt:
; CHECK-FP16: fsqrt.4h
; CHECK-FP16-NEXT: ret
%1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a)
ret %v4f16 %1
}
; CHECK: test_v2f32.powi:
define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
; CHECK: pow
@ -63,50 +79,142 @@ define %v2f32 @test_v2f32.log2(%v2f32 %a) {
%1 = call %v2f32 @llvm.log2.v2f32(%v2f32 %a)
ret %v2f32 %1
}
; CHECK: test_v2f32.fma:
; CHECK-LABEL: test_v2f32.fma:
define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
; CHECK: fma
; CHECK: fmla.2s
%1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c)
ret %v2f32 %1
}
; CHECK: test_v2f32.fabs:
define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
; CHECK-LABEL: test_v4f16.fma:
; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
; CHECK-FP16-LABEL: test_v4f16.fma:
; CHECK-FP16: fmla.4h
%1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c)
ret %v4f16 %1
}
; CHECK-LABEL: test_v2f32.fabs:
define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
; CHECK: fabs
; CHECK: fabs.2s
%1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
ret %v2f32 %1
}
; CHECK: test_v2f32.floor:
define %v4f16 @test_v4f16.fabs(%v4f16 %a) {
; CHECK-LABEL: test_v4f16.fabs:
; CHECK: fabs s{{.}}, s{{.}}
; CHECK: fabs s{{.}}, s{{.}}
; CHECK: fabs s{{.}}, s{{.}}
; CHECK: fabs s{{.}}, s{{.}}
; CHECK-FP16-LABEL: test_v4f16.fabs:
; CHECK-FP16: fabs.4h
; CHECK-FP16-NEXT: ret
%1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a)
ret %v4f16 %1
}
; CHECK-LABEL: test_v2f32.floor:
define %v2f32 @test_v2f32.floor(%v2f32 %a) {
; CHECK: frintm.2s
%1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
ret %v2f32 %1
}
; CHECK: test_v2f32.ceil:
define %v4f16 @test_v4f16.floor(%v4f16 %a) {
; CHECK-LABEL: test_v4f16.floor:
; CHECK: frintm s{{.}}, s{{.}}
; CHECK: frintm s{{.}}, s{{.}}
; CHECK: frintm s{{.}}, s{{.}}
; CHECK: frintm s{{.}}, s{{.}}
; CHECK-FP16-LABEL: test_v4f16.floor:
; CHECK-FP16: frintm.4h
; CHECK-FP16-NEXT: ret
%1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a)
ret %v4f16 %1
}
; CHECK-LABEL: test_v2f32.ceil:
define %v2f32 @test_v2f32.ceil(%v2f32 %a) {
; CHECK: frintp.2s
%1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a)
ret %v2f32 %1
}
; CHECK: test_v2f32.trunc:
define %v4f16 @test_v4f16.ceil(%v4f16 %a) {
; CHECK-LABEL: test_v4f16.ceil:
; CHECK: frintp s{{.}}, s{{.}}
; CHECK: frintp s{{.}}, s{{.}}
; CHECK: frintp s{{.}}, s{{.}}
; CHECK: frintp s{{.}}, s{{.}}
; CHECK-FP16-LABEL: test_v4f16.ceil:
; CHECK-FP16: frintp.4h
; CHECK-FP16-NEXT: ret
%1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a)
ret %v4f16 %1
}
; CHECK-LABEL: test_v2f32.trunc:
define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
; CHECK: frintz.2s
%1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
ret %v2f32 %1
}
; CHECK: test_v2f32.rint:
define %v4f16 @test_v4f16.trunc(%v4f16 %a) {
; CHECK-LABEL: test_v4f16.trunc:
; CHECK: frintz s{{.}}, s{{.}}
; CHECK: frintz s{{.}}, s{{.}}
; CHECK: frintz s{{.}}, s{{.}}
; CHECK: frintz s{{.}}, s{{.}}
; CHECK-FP16-LABEL: test_v4f16.trunc:
; CHECK-FP16: frintz.4h
; CHECK-FP16-NEXT: ret
%1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a)
ret %v4f16 %1
}
; CHECK-LABEL: test_v2f32.rint:
define %v2f32 @test_v2f32.rint(%v2f32 %a) {
; CHECK: frintx.2s
%1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
ret %v2f32 %1
}
; CHECK: test_v2f32.nearbyint:
define %v4f16 @test_v4f16.rint(%v4f16 %a) {
; CHECK-LABEL: test_v4f16.rint:
; CHECK: frintx s{{.}}, s{{.}}
; CHECK: frintx s{{.}}, s{{.}}
; CHECK: frintx s{{.}}, s{{.}}
; CHECK: frintx s{{.}}, s{{.}}
; CHECK-FP16-LABEL: test_v4f16.rint:
; CHECK-FP16: frintx.4h
; CHECK-FP16-NEXT: ret
%1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a)
ret %v4f16 %1
}
; CHECK-LABEL: test_v2f32.nearbyint:
define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
; CHECK: frinti.2s
%1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
ret %v2f32 %1
}
define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) {
; CHECK-LABEL: test_v4f16.nearbyint:
; CHECK: frinti s{{.}}, s{{.}}
; CHECK: frinti s{{.}}, s{{.}}
; CHECK: frinti s{{.}}, s{{.}}
; CHECK: frinti s{{.}}, s{{.}}
; CHECK-FP16-LABEL: test_v4f16.nearbyint:
; CHECK-FP16: frinti.4h
; CHECK-FP16-NEXT: ret
%1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a)
ret %v4f16 %1
}
declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0
declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
@ -116,13 +224,27 @@ declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
declare %v2f32 @llvm.log.v2f32(%v2f32) #0
declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0
declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0
declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
declare %v4f16 @llvm.floor.v4f16(%v4f16) #0
declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0
declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0
declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
;;;

File diff suppressed because it is too large Load Diff