AArch64 NEON: add 64-bit scalar intrinsics for _f64 mla/mls etc.
These seem to be supported by GCC, and do make sense architecturally so we should probably have them. llvm-svn: 202138
This commit is contained in:
parent
87da936164
commit
3d4575cc1b
|
@ -623,10 +623,14 @@ def FMLA : SInst<"vfma", "dddd", "dQd">;
|
|||
def FMLS : SInst<"vfms", "dddd", "fdQfQd">;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// MUL, FMA, FMS definitions with scalar argument
|
||||
// MUL, MLA, MLS, FMA, FMS definitions with scalar argument
|
||||
def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>;
|
||||
def FMLA_N : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>;
|
||||
def FMLS_N : SOpInst<"vfms_n", "ddds", "fQf", OP_FMLS_N>;
|
||||
|
||||
def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>;
|
||||
def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>;
|
||||
|
||||
def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>;
|
||||
def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Logical operations
|
||||
|
|
|
@ -26,6 +26,15 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
|
|||
// CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
}
|
||||
|
||||
float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
|
||||
// CHECK-LABEL: test_vmlaq_n_f64
|
||||
return vmlaq_n_f64(a, b, c);
|
||||
// CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
|
||||
// CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
// CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
|
||||
// CHECK-FMA: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
}
|
||||
|
||||
float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
|
||||
// CHECK-LABEL: test_vmlsq_n_f32
|
||||
return vmlsq_n_f32(a, b, c);
|
||||
|
@ -44,6 +53,15 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
|
|||
// CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
}
|
||||
|
||||
float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
|
||||
// CHECK-LABEL: test_vmlsq_n_f64
|
||||
return vmlsq_n_f64(a, b, c);
|
||||
// CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
|
||||
// CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
// CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
|
||||
// CHECK-FMA: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
}
|
||||
|
||||
float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
|
||||
// CHECK-LABEL: test_vmla_lane_f32_0
|
||||
return vmla_lane_f32(a, b, v, 0);
|
||||
|
@ -171,3 +189,14 @@ float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
|
|||
// CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
|
||||
}
|
||||
|
||||
float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
|
||||
// CHECK-LABEL: test_vfmaq_n_f64:
|
||||
return vfmaq_n_f64(a, b, c);
|
||||
// CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
}
|
||||
|
||||
float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
|
||||
// CHECK-LABEL: test_vfmsq_n_f64:
|
||||
return vfmsq_n_f64(a, b, c);
|
||||
// CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue