Rollup merge of #126555 - beetrees:f16-inline-asm-arm, r=Amanieu

Add `f16` inline ASM support for 32-bit ARM

Adds `f16` inline ASM support for 32-bit ARM. SIMD vector types are taken from [here](https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:`@navigationhierarchiesreturnbasetype=[float]&f:@navigationhierarchieselementbitsize=[16]&f:@navigationhierarchiesarchitectures=[A32]).`

Relevant issue: #125398
Tracking issue: #116909

`@rustbot` label +F-f16_and_f128
This commit is contained in:
Guillaume Gomez 2024-06-22 12:57:18 +02:00 committed by GitHub
commit 07e8b3ac01
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 365 additions and 183 deletions

View File

@ -1037,6 +1037,19 @@ fn llvm_fixup_input<'ll, 'tcx>(
value value
} }
} }
(
InlineAsmRegClass::Arm(
ArmInlineAsmRegClass::dreg
| ArmInlineAsmRegClass::dreg_low8
| ArmInlineAsmRegClass::dreg_low16
| ArmInlineAsmRegClass::qreg
| ArmInlineAsmRegClass::qreg_low4
| ArmInlineAsmRegClass::qreg_low8,
),
Abi::Vector { element, count: count @ (4 | 8) },
) if element.primitive() == Primitive::Float(Float::F16) => {
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
}
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => { (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
match s.primitive() { match s.primitive() {
// MIPS only supports register-length arithmetics. // MIPS only supports register-length arithmetics.
@ -1158,6 +1171,19 @@ fn llvm_fixup_output<'ll, 'tcx>(
value value
} }
} }
(
InlineAsmRegClass::Arm(
ArmInlineAsmRegClass::dreg
| ArmInlineAsmRegClass::dreg_low8
| ArmInlineAsmRegClass::dreg_low16
| ArmInlineAsmRegClass::qreg
| ArmInlineAsmRegClass::qreg_low4
| ArmInlineAsmRegClass::qreg_low8,
),
Abi::Vector { element, count: count @ (4 | 8) },
) if element.primitive() == Primitive::Float(Float::F16) => {
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
}
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => { (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
match s.primitive() { match s.primitive() {
// MIPS only supports register-length arithmetics. // MIPS only supports register-length arithmetics.
@ -1270,6 +1296,19 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
layout.llvm_type(cx) layout.llvm_type(cx)
} }
} }
(
InlineAsmRegClass::Arm(
ArmInlineAsmRegClass::dreg
| ArmInlineAsmRegClass::dreg_low8
| ArmInlineAsmRegClass::dreg_low16
| ArmInlineAsmRegClass::qreg
| ArmInlineAsmRegClass::qreg_low4
| ArmInlineAsmRegClass::qreg_low8,
),
Abi::Vector { element, count: count @ (4 | 8) },
) if element.primitive() == Primitive::Float(Float::F16) => {
cx.type_vector(cx.type_i16(), count)
}
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => { (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
match s.primitive() { match s.primitive() {
// MIPS only supports register-length arithmetics. // MIPS only supports register-length arithmetics.

View File

@ -47,16 +47,18 @@ impl ArmInlineAsmRegClass {
_arch: InlineAsmArch, _arch: InlineAsmArch,
) -> &'static [(InlineAsmType, Option<Symbol>)] { ) -> &'static [(InlineAsmType, Option<Symbol>)] {
match self { match self {
Self::reg => types! { _: I8, I16, I32, F32; }, Self::reg => types! { _: I8, I16, I32, F16, F32; },
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; }, Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; },
Self::dreg_low16 | Self::dreg_low8 => types! { Self::dreg_low16 | Self::dreg_low8 => types! {
vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2); vfp2: I64, F64;
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
}, },
Self::dreg => types! { Self::dreg => types! {
d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2); d32: I64, F64;
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
}, },
Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! { Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! {
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4); neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4);
}, },
} }
} }

View File

@ -1,10 +1,13 @@
//@ revisions: base d32 neon
//@ assembly-output: emit-asm //@ assembly-output: emit-asm
//@ compile-flags: --target armv7-unknown-linux-gnueabihf //@ compile-flags: --target armv7-unknown-linux-gnueabihf
//@ compile-flags: -C target-feature=+neon
//@ compile-flags: -C opt-level=0 //@ compile-flags: -C opt-level=0
//@[d32] compile-flags: -C target-feature=+d32
//@[neon] compile-flags: -C target-feature=+neon --cfg d32
//@[neon] filecheck-flags: --check-prefix d32
//@ needs-llvm-components: arm //@ needs-llvm-components: arm
#![feature(no_core, lang_items, rustc_attrs, repr_simd)] #![feature(no_core, lang_items, rustc_attrs, repr_simd, f16)]
#![crate_type = "rlib"] #![crate_type = "rlib"]
#![no_core] #![no_core]
#![allow(asm_sub_register, non_camel_case_types)] #![allow(asm_sub_register, non_camel_case_types)]
@ -38,6 +41,8 @@ pub struct i32x2(i32, i32);
#[repr(simd)] #[repr(simd)]
pub struct i64x1(i64); pub struct i64x1(i64);
#[repr(simd)] #[repr(simd)]
pub struct f16x4(f16, f16, f16, f16);
#[repr(simd)]
pub struct f32x2(f32, f32); pub struct f32x2(f32, f32);
#[repr(simd)] #[repr(simd)]
pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8);
@ -48,11 +53,14 @@ pub struct i32x4(i32, i32, i32, i32);
#[repr(simd)] #[repr(simd)]
pub struct i64x2(i64, i64); pub struct i64x2(i64, i64);
#[repr(simd)] #[repr(simd)]
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
#[repr(simd)]
pub struct f32x4(f32, f32, f32, f32); pub struct f32x4(f32, f32, f32, f32);
impl Copy for i8 {} impl Copy for i8 {}
impl Copy for i16 {} impl Copy for i16 {}
impl Copy for i32 {} impl Copy for i32 {}
impl Copy for f16 {}
impl Copy for f32 {} impl Copy for f32 {}
impl Copy for i64 {} impl Copy for i64 {}
impl Copy for f64 {} impl Copy for f64 {}
@ -61,11 +69,13 @@ impl Copy for i8x8 {}
impl Copy for i16x4 {} impl Copy for i16x4 {}
impl Copy for i32x2 {} impl Copy for i32x2 {}
impl Copy for i64x1 {} impl Copy for i64x1 {}
impl Copy for f16x4 {}
impl Copy for f32x2 {} impl Copy for f32x2 {}
impl Copy for i8x16 {} impl Copy for i8x16 {}
impl Copy for i16x8 {} impl Copy for i16x8 {}
impl Copy for i32x4 {} impl Copy for i32x4 {}
impl Copy for i64x2 {} impl Copy for i64x2 {}
impl Copy for f16x8 {}
impl Copy for f32x4 {} impl Copy for f32x4 {}
extern "C" { extern "C" {
@ -152,6 +162,12 @@ check!(reg_i16 i16 reg "mov");
// CHECK: @NO_APP // CHECK: @NO_APP
check!(reg_i32 i32 reg "mov"); check!(reg_i32 i32 reg "mov");
// CHECK-LABEL: reg_f16:
// CHECK: @APP
// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}}
// CHECK: @NO_APP
check!(reg_f16 f16 reg "mov");
// CHECK-LABEL: reg_f32: // CHECK-LABEL: reg_f32:
// CHECK: @APP // CHECK: @APP
// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}} // CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}}
@ -170,6 +186,12 @@ check!(reg_ptr ptr reg "mov");
// CHECK: @NO_APP // CHECK: @NO_APP
check!(sreg_i32 i32 sreg "vmov.f32"); check!(sreg_i32 i32 sreg "vmov.f32");
// CHECK-LABEL: sreg_f16:
// CHECK: @APP
// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: @NO_APP
check!(sreg_f16 f16 sreg "vmov.f32");
// CHECK-LABEL: sreg_f32: // CHECK-LABEL: sreg_f32:
// CHECK: @APP // CHECK: @APP
// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} // CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
@ -188,52 +210,72 @@ check!(sreg_ptr ptr sreg "vmov.f32");
// CHECK: @NO_APP // CHECK: @NO_APP
check!(sreg_low16_i32 i32 sreg_low16 "vmov.f32"); check!(sreg_low16_i32 i32 sreg_low16 "vmov.f32");
// CHECK-LABEL: sreg_low16_f16:
// CHECK: @APP
// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: @NO_APP
check!(sreg_low16_f16 f16 sreg_low16 "vmov.f32");
// CHECK-LABEL: sreg_low16_f32: // CHECK-LABEL: sreg_low16_f32:
// CHECK: @APP // CHECK: @APP
// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}} // CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: @NO_APP // CHECK: @NO_APP
check!(sreg_low16_f32 f32 sreg_low16 "vmov.f32"); check!(sreg_low16_f32 f32 sreg_low16 "vmov.f32");
// CHECK-LABEL: dreg_i64: // d32-LABEL: dreg_i64:
// CHECK: @APP // d32: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // d32: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // d32: @NO_APP
#[cfg(d32)]
check!(dreg_i64 i64 dreg "vmov.f64"); check!(dreg_i64 i64 dreg "vmov.f64");
// CHECK-LABEL: dreg_f64: // d32-LABEL: dreg_f64:
// CHECK: @APP // d32: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // d32: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // d32: @NO_APP
#[cfg(d32)]
check!(dreg_f64 f64 dreg "vmov.f64"); check!(dreg_f64 f64 dreg "vmov.f64");
// CHECK-LABEL: dreg_i8x8: // neon-LABEL: dreg_i8x8:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_i8x8 i8x8 dreg "vmov.f64"); check!(dreg_i8x8 i8x8 dreg "vmov.f64");
// CHECK-LABEL: dreg_i16x4: // neon-LABEL: dreg_i16x4:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_i16x4 i16x4 dreg "vmov.f64"); check!(dreg_i16x4 i16x4 dreg "vmov.f64");
// CHECK-LABEL: dreg_i32x2: // neon-LABEL: dreg_i32x2:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_i32x2 i32x2 dreg "vmov.f64"); check!(dreg_i32x2 i32x2 dreg "vmov.f64");
// CHECK-LABEL: dreg_i64x1: // neon-LABEL: dreg_i64x1:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_i64x1 i64x1 dreg "vmov.f64"); check!(dreg_i64x1 i64x1 dreg "vmov.f64");
// CHECK-LABEL: dreg_f32x2: // neon-LABEL: dreg_f16x4:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_f16x4 f16x4 dreg "vmov.f64");
// neon-LABEL: dreg_f32x2:
// neon: @APP
// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// neon: @NO_APP
#[cfg(neon)]
check!(dreg_f32x2 f32x2 dreg "vmov.f64"); check!(dreg_f32x2 f32x2 dreg "vmov.f64");
// CHECK-LABEL: dreg_low16_i64: // CHECK-LABEL: dreg_low16_i64:
@ -248,34 +290,46 @@ check!(dreg_low16_i64 i64 dreg_low16 "vmov.f64");
// CHECK: @NO_APP // CHECK: @NO_APP
check!(dreg_low16_f64 f64 dreg_low16 "vmov.f64"); check!(dreg_low16_f64 f64 dreg_low16 "vmov.f64");
// CHECK-LABEL: dreg_low16_i8x8: // neon-LABEL: dreg_low16_i8x8:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low16_i8x8 i8x8 dreg_low16 "vmov.f64"); check!(dreg_low16_i8x8 i8x8 dreg_low16 "vmov.f64");
// CHECK-LABEL: dreg_low16_i16x4: // neon-LABEL: dreg_low16_i16x4:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low16_i16x4 i16x4 dreg_low16 "vmov.f64"); check!(dreg_low16_i16x4 i16x4 dreg_low16 "vmov.f64");
// CHECK-LABEL: dreg_low16_i32x2: // neon-LABEL: dreg_low16_i32x2:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low16_i32x2 i32x2 dreg_low16 "vmov.f64"); check!(dreg_low16_i32x2 i32x2 dreg_low16 "vmov.f64");
// CHECK-LABEL: dreg_low16_i64x1: // neon-LABEL: dreg_low16_i64x1:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low16_i64x1 i64x1 dreg_low16 "vmov.f64"); check!(dreg_low16_i64x1 i64x1 dreg_low16 "vmov.f64");
// CHECK-LABEL: dreg_low16_f32x2: // neon-LABEL: dreg_low16_f16x4:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low16_f16x4 f16x4 dreg_low16 "vmov.f64");
// neon-LABEL: dreg_low16_f32x2:
// neon: @APP
// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// neon: @NO_APP
#[cfg(neon)]
check!(dreg_low16_f32x2 f32x2 dreg_low16 "vmov.f64"); check!(dreg_low16_f32x2 f32x2 dreg_low16 "vmov.f64");
// CHECK-LABEL: dreg_low8_i64: // CHECK-LABEL: dreg_low8_i64:
@ -290,124 +344,172 @@ check!(dreg_low8_i64 i64 dreg_low8 "vmov.f64");
// CHECK: @NO_APP // CHECK: @NO_APP
check!(dreg_low8_f64 f64 dreg_low8 "vmov.f64"); check!(dreg_low8_f64 f64 dreg_low8 "vmov.f64");
// CHECK-LABEL: dreg_low8_i8x8: // neon-LABEL: dreg_low8_i8x8:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low8_i8x8 i8x8 dreg_low8 "vmov.f64"); check!(dreg_low8_i8x8 i8x8 dreg_low8 "vmov.f64");
// CHECK-LABEL: dreg_low8_i16x4: // neon-LABEL: dreg_low8_i16x4:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low8_i16x4 i16x4 dreg_low8 "vmov.f64"); check!(dreg_low8_i16x4 i16x4 dreg_low8 "vmov.f64");
// CHECK-LABEL: dreg_low8_i32x2: // neon-LABEL: dreg_low8_i32x2:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low8_i32x2 i32x2 dreg_low8 "vmov.f64"); check!(dreg_low8_i32x2 i32x2 dreg_low8 "vmov.f64");
// CHECK-LABEL: dreg_low8_i64x1: // neon-LABEL: dreg_low8_i64x1:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low8_i64x1 i64x1 dreg_low8 "vmov.f64"); check!(dreg_low8_i64x1 i64x1 dreg_low8 "vmov.f64");
// CHECK-LABEL: dreg_low8_f32x2: // neon-LABEL: dreg_low8_f16x4:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} // neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(dreg_low8_f16x4 f16x4 dreg_low8 "vmov.f64");
// neon-LABEL: dreg_low8_f32x2:
// neon: @APP
// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
// neon: @NO_APP
#[cfg(neon)]
check!(dreg_low8_f32x2 f32x2 dreg_low8 "vmov.f64"); check!(dreg_low8_f32x2 f32x2 dreg_low8 "vmov.f64");
// CHECK-LABEL: qreg_i8x16: // neon-LABEL: qreg_i8x16:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_i8x16 i8x16 qreg "vmov"); check!(qreg_i8x16 i8x16 qreg "vmov");
// CHECK-LABEL: qreg_i16x8: // neon-LABEL: qreg_i16x8:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_i16x8 i16x8 qreg "vmov"); check!(qreg_i16x8 i16x8 qreg "vmov");
// CHECK-LABEL: qreg_i32x4: // neon-LABEL: qreg_i32x4:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_i32x4 i32x4 qreg "vmov"); check!(qreg_i32x4 i32x4 qreg "vmov");
// CHECK-LABEL: qreg_i64x2: // neon-LABEL: qreg_i64x2:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_i64x2 i64x2 qreg "vmov"); check!(qreg_i64x2 i64x2 qreg "vmov");
// CHECK-LABEL: qreg_f32x4: // neon-LABEL: qreg_f16x8:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_f16x8 f16x8 qreg "vmov");
// neon-LABEL: qreg_f32x4:
// neon: @APP
// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// neon: @NO_APP
#[cfg(neon)]
check!(qreg_f32x4 f32x4 qreg "vmov"); check!(qreg_f32x4 f32x4 qreg "vmov");
// CHECK-LABEL: qreg_low8_i8x16: // neon-LABEL: qreg_low8_i8x16:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low8_i8x16 i8x16 qreg_low8 "vmov"); check!(qreg_low8_i8x16 i8x16 qreg_low8 "vmov");
// CHECK-LABEL: qreg_low8_i16x8: // neon-LABEL: qreg_low8_i16x8:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low8_i16x8 i16x8 qreg_low8 "vmov"); check!(qreg_low8_i16x8 i16x8 qreg_low8 "vmov");
// CHECK-LABEL: qreg_low8_i32x4: // neon-LABEL: qreg_low8_i32x4:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low8_i32x4 i32x4 qreg_low8 "vmov"); check!(qreg_low8_i32x4 i32x4 qreg_low8 "vmov");
// CHECK-LABEL: qreg_low8_i64x2: // neon-LABEL: qreg_low8_i64x2:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low8_i64x2 i64x2 qreg_low8 "vmov"); check!(qreg_low8_i64x2 i64x2 qreg_low8 "vmov");
// CHECK-LABEL: qreg_low8_f32x4: // neon-LABEL: qreg_low8_f16x8:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low8_f16x8 f16x8 qreg_low8 "vmov");
// neon-LABEL: qreg_low8_f32x4:
// neon: @APP
// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// neon: @NO_APP
#[cfg(neon)]
check!(qreg_low8_f32x4 f32x4 qreg_low8 "vmov"); check!(qreg_low8_f32x4 f32x4 qreg_low8 "vmov");
// CHECK-LABEL: qreg_low4_i8x16: // neon-LABEL: qreg_low4_i8x16:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low4_i8x16 i8x16 qreg_low4 "vmov"); check!(qreg_low4_i8x16 i8x16 qreg_low4 "vmov");
// CHECK-LABEL: qreg_low4_i16x8: // neon-LABEL: qreg_low4_i16x8:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low4_i16x8 i16x8 qreg_low4 "vmov"); check!(qreg_low4_i16x8 i16x8 qreg_low4 "vmov");
// CHECK-LABEL: qreg_low4_i32x4: // neon-LABEL: qreg_low4_i32x4:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low4_i32x4 i32x4 qreg_low4 "vmov"); check!(qreg_low4_i32x4 i32x4 qreg_low4 "vmov");
// CHECK-LABEL: qreg_low4_i64x2: // neon-LABEL: qreg_low4_i64x2:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low4_i64x2 i64x2 qreg_low4 "vmov"); check!(qreg_low4_i64x2 i64x2 qreg_low4 "vmov");
// CHECK-LABEL: qreg_low4_f32x4: // neon-LABEL: qreg_low4_f16x8:
// CHECK: @APP // neon: @APP
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} // neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check!(qreg_low4_f16x8 f16x8 qreg_low4 "vmov");
// neon-LABEL: qreg_low4_f32x4:
// neon: @APP
// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
// neon: @NO_APP
#[cfg(neon)]
check!(qreg_low4_f32x4 f32x4 qreg_low4 "vmov"); check!(qreg_low4_f32x4 f32x4 qreg_low4 "vmov");
// CHECK-LABEL: r0_i8: // CHECK-LABEL: r0_i8:
@ -428,6 +530,12 @@ check_reg!(r0_i16 i16 "r0" "mov");
// CHECK: @NO_APP // CHECK: @NO_APP
check_reg!(r0_i32 i32 "r0" "mov"); check_reg!(r0_i32 i32 "r0" "mov");
// CHECK-LABEL: r0_f16:
// CHECK: @APP
// CHECK: mov r0, r0
// CHECK: @NO_APP
check_reg!(r0_f16 f16 "r0" "mov");
// CHECK-LABEL: r0_f32: // CHECK-LABEL: r0_f32:
// CHECK: @APP // CHECK: @APP
// CHECK: mov r0, r0 // CHECK: mov r0, r0
@ -446,6 +554,12 @@ check_reg!(r0_ptr ptr "r0" "mov");
// CHECK: @NO_APP // CHECK: @NO_APP
check_reg!(s0_i32 i32 "s0" "vmov.f32"); check_reg!(s0_i32 i32 "s0" "vmov.f32");
// CHECK-LABEL: s0_f16:
// CHECK: @APP
// CHECK: vmov.f32 s0, s0
// CHECK: @NO_APP
check_reg!(s0_f16 f16 "s0" "vmov.f32");
// CHECK-LABEL: s0_f32: // CHECK-LABEL: s0_f32:
// CHECK: @APP // CHECK: @APP
// CHECK: vmov.f32 s0, s0 // CHECK: vmov.f32 s0, s0
@ -458,74 +572,101 @@ check_reg!(s0_f32 f32 "s0" "vmov.f32");
// CHECK: @NO_APP // CHECK: @NO_APP
check_reg!(s0_ptr ptr "s0" "vmov.f32"); check_reg!(s0_ptr ptr "s0" "vmov.f32");
// CHECK-LABEL: d0_i64: // FIXME(#126797): "d0" should work with `i64` and `f64` even when `d32` is disabled.
// CHECK: @APP // d32-LABEL: d0_i64:
// CHECK: vmov.f64 d0, d0 // d32: @APP
// CHECK: @NO_APP // d32: vmov.f64 d0, d0
// d32: @NO_APP
#[cfg(d32)]
check_reg!(d0_i64 i64 "d0" "vmov.f64"); check_reg!(d0_i64 i64 "d0" "vmov.f64");
// CHECK-LABEL: d0_f64: // d32-LABEL: d0_f64:
// CHECK: @APP // d32: @APP
// CHECK: vmov.f64 d0, d0 // d32: vmov.f64 d0, d0
// CHECK: @NO_APP // d32: @NO_APP
#[cfg(d32)]
check_reg!(d0_f64 f64 "d0" "vmov.f64"); check_reg!(d0_f64 f64 "d0" "vmov.f64");
// CHECK-LABEL: d0_i8x8: // neon-LABEL: d0_i8x8:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d0, d0 // neon: vmov.f64 d0, d0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(d0_i8x8 i8x8 "d0" "vmov.f64"); check_reg!(d0_i8x8 i8x8 "d0" "vmov.f64");
// CHECK-LABEL: d0_i16x4: // neon-LABEL: d0_i16x4:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d0, d0 // neon: vmov.f64 d0, d0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(d0_i16x4 i16x4 "d0" "vmov.f64"); check_reg!(d0_i16x4 i16x4 "d0" "vmov.f64");
// CHECK-LABEL: d0_i32x2: // neon-LABEL: d0_i32x2:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d0, d0 // neon: vmov.f64 d0, d0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(d0_i32x2 i32x2 "d0" "vmov.f64"); check_reg!(d0_i32x2 i32x2 "d0" "vmov.f64");
// CHECK-LABEL: d0_i64x1: // neon-LABEL: d0_i64x1:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d0, d0 // neon: vmov.f64 d0, d0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(d0_i64x1 i64x1 "d0" "vmov.f64"); check_reg!(d0_i64x1 i64x1 "d0" "vmov.f64");
// CHECK-LABEL: d0_f32x2: // neon-LABEL: d0_f16x4:
// CHECK: @APP // neon: @APP
// CHECK: vmov.f64 d0, d0 // neon: vmov.f64 d0, d0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(d0_f16x4 f16x4 "d0" "vmov.f64");
// neon-LABEL: d0_f32x2:
// neon: @APP
// neon: vmov.f64 d0, d0
// neon: @NO_APP
#[cfg(neon)]
check_reg!(d0_f32x2 f32x2 "d0" "vmov.f64"); check_reg!(d0_f32x2 f32x2 "d0" "vmov.f64");
// CHECK-LABEL: q0_i8x16: // neon-LABEL: q0_i8x16:
// CHECK: @APP // neon: @APP
// CHECK: vorr q0, q0, q0 // neon: vorr q0, q0, q0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(q0_i8x16 i8x16 "q0" "vmov"); check_reg!(q0_i8x16 i8x16 "q0" "vmov");
// CHECK-LABEL: q0_i16x8: // neon-LABEL: q0_i16x8:
// CHECK: @APP // neon: @APP
// CHECK: vorr q0, q0, q0 // neon: vorr q0, q0, q0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(q0_i16x8 i16x8 "q0" "vmov"); check_reg!(q0_i16x8 i16x8 "q0" "vmov");
// CHECK-LABEL: q0_i32x4: // neon-LABEL: q0_i32x4:
// CHECK: @APP // neon: @APP
// CHECK: vorr q0, q0, q0 // neon: vorr q0, q0, q0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(q0_i32x4 i32x4 "q0" "vmov"); check_reg!(q0_i32x4 i32x4 "q0" "vmov");
// CHECK-LABEL: q0_i64x2: // neon-LABEL: q0_i64x2:
// CHECK: @APP // neon: @APP
// CHECK: vorr q0, q0, q0 // neon: vorr q0, q0, q0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(q0_i64x2 i64x2 "q0" "vmov"); check_reg!(q0_i64x2 i64x2 "q0" "vmov");
// CHECK-LABEL: q0_f32x4: // neon-LABEL: q0_f16x8:
// CHECK: @APP // neon: @APP
// CHECK: vorr q0, q0, q0 // neon: vorr q0, q0, q0
// CHECK: @NO_APP // neon: @NO_APP
#[cfg(neon)]
check_reg!(q0_f16x8 f16x8 "q0" "vmov");
// neon-LABEL: q0_f32x4:
// neon: @APP
// neon: vorr q0, q0, q0
// neon: @NO_APP
#[cfg(neon)]
check_reg!(q0_f32x4 f32x4 "q0" "vmov"); check_reg!(q0_f32x4 f32x4 "q0" "vmov");