ARM: add a couple more NEON predicates.

The fused multiply instructions were added in VFPv4 but are still NEON instructions, in particular they shouldn't be available on a Cortex-M4 not matter how floaty it is. llvm-svn: 193342
2013-10-24 12:48:05 +00:00 · 2013-10-24 12:48:05 +00:00 · 225bcbbe71
parent 64dacb2b8a
commit 225bcbbe71
2 changed files with 16 additions and 5 deletions
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@ -4301,19 +4301,19 @@ def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
 def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
                          v2f32, fmul_su, fadd_mlx>,
-                Requires<[HasVFP4,UseFusedMAC]>;
+                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;

 def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
                          v4f32, fmul_su, fadd_mlx>,
-                Requires<[HasVFP4,UseFusedMAC]>;
+                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;

 //   Fused Vector Multiply Subtract (floating-point)
 def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
                          v2f32, fmul_su, fsub_mlx>,
-                Requires<[HasVFP4,UseFusedMAC]>;
+                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
 def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
                          v4f32, fmul_su, fsub_mlx>,
-                Requires<[HasVFP4,UseFusedMAC]>;
+                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;

 // Match @llvm.fma.* intrinsics
 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
--- a/llvm/test/MC/ARM/vfp4.s
+++ b/llvm/test/MC/ARM/vfp4.s
@ -1,6 +1,8 @@
@ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4   | FileCheck %s --check-prefix=ARM
@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
-@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 | FileCheck %s --check-prefix=THUMB_V7EM
+@ RUN: not llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 > %t 2> %t2
+@ RUN:     FileCheck %s < %t --check-prefix=THUMB_V7EM
+@ RUN:     FileCheck %s < %t2 --check-prefix=THUMB_V7EM-ERRORS

@ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
@ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
@ -13,10 +15,14 @@ vfma.f32 s2, s4, s0

@ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
@ THUMB: vfma.f32 d16, d18, d17 @ encoding: [0x42,0xef,0xb1,0x0c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfma.f32 d16, d18, d17
 vfma.f32 d16, d18, d17

@ ARM: vfma.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x08,0xf2]
@ THUMB: vfma.f32	q2, q4, q0 @ encoding: [0x08,0xef,0x50,0x4c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfma.f32 q2, q4, q0
 vfma.f32 q2, q4, q0

@ ARM: vfnma.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xd2,0xee]
@ -39,10 +45,14 @@ vfms.f32 s2, s4, s0

@ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
@ THUMB: vfms.f32 d16, d18, d17 @ encoding: [0x62,0xef,0xb1,0x0c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfms.f32 d16, d18, d17
 vfms.f32 d16, d18, d17

@ ARM: vfms.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x28,0xf2]
@ THUMB: vfms.f32	q2, q4, q0 @ encoding: [0x28,0xef,0x50,0x4c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfms.f32 q2, q4, q0
 vfms.f32 q2, q4, q0

@ ARM: vfnms.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xd2,0xee]
@ -51,4 +61,5 @@ vfnms.f64 d16, d18, d17

@ ARM: vfnms.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0x92,0xee]
@ THUMB: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
+@ THUMB_V7EM: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
 vfnms.f32 s2, s4, s0