diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 4d2e0530c9d9..d015d915075d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -22,6 +22,39 @@ def int_aarch64_neon_vacgeq : def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +// Vector saturating accumulate +def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic; +def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic; + +// Vector Bitwise reverse +def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic; + +// Vector extract and narrow +def int_aarch64_neon_xtn : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; + +// Vector floating-point convert +def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic; +def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic; +def int_aarch64_neon_fcvtxn : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtns : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtnu : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtps : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtpu : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtms : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtmu : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtas : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; +def int_aarch64_neon_fcvtau : + Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; + // Vector maxNum (Floating Point) def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ab46d7f7b342..b437ce1b4573 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -338,6 +338,30 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v4f32, Custom); setOperationAction(ISD::SETCC, MVT::v1f64, Custom); setOperationAction(ISD::SETCC, MVT::v2f64, Custom); + + setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + + setOperationAction(ISD::FCEIL, MVT::v2f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + + setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + + setOperationAction(ISD::FRINT, MVT::v2f32, Legal); + setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v2f64, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + + setOperationAction(ISD::FROUND, MVT::v2f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); } } @@ -891,6 +915,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { return "AArch64ISD::NEON_VDUP"; case AArch64ISD::NEON_VDUPLANE: return "AArch64ISD::NEON_VDUPLANE"; + case AArch64ISD::NEON_REV16: + return "AArch64ISD::NEON_REV16"; + case AArch64ISD::NEON_REV32: + return "AArch64ISD::NEON_REV32"; + case AArch64ISD::NEON_REV64: + return "AArch64ISD::NEON_REV64"; case AArch64ISD::NEON_LD1_UPD: return "AArch64ISD::NEON_LD1_UPD"; case AArch64ISD::NEON_LD2_UPD: @@ -3797,6 +3827,36 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return SDValue(); } +/// isREVMask - Check if a vector shuffle corresponds to a REV +/// instruction with the specified blocksize. (The order of the elements +/// within each block of the vector is reversed.) +static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { + assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && + "Only possible block sizes for REV are: 16, 32, 64"); + + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; + + if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) + return false; + + for (unsigned i = 0; i < NumElts; ++i) { + if (M[i] < 0) + continue; // ignore UNDEF indices + if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) + return false; + } + + return true; +} + SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { @@ -3816,6 +3876,13 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (EltSize > 64) return SDValue(); + if (isREVMask(ShuffleMask, VT, 64)) + return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1); + if (isREVMask(ShuffleMask, VT, 32)) + return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1); + if (isREVMask(ShuffleMask, VT, 16)) + return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1); + // If the element of shuffle mask are all the same constant, we can // transform it into either NEON_VDUP or NEON_VDUPLANE if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 83fd79d6ba77..be55395a0d36 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -125,6 +125,11 @@ namespace AArch64ISD { // Vector FP move immediate NEON_FMOVIMM, + // Vector Element reverse + NEON_REV64, + NEON_REV32, + NEON_REV16, + // Vector compare NEON_CMP, diff --git a/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/llvm/lib/Target/AArch64/AArch64InstrNEON.td index 958d1a0549e4..6822f0ce277b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrNEON.td +++ b/llvm/lib/Target/AArch64/AArch64InstrNEON.td @@ -46,6 +46,10 @@ def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; +def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; +def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>; +def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>; +def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>; def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2, @@ -1610,6 +1614,21 @@ def Neon_low4f : PatFrag<(ops node:$in), (v2f32 (extract_subvector (v4f32 node:$in), (iPTR 0)))>; +def neon_uimm3_shift : Operand, + ImmLeaf { + let ParserMatchClass = uimm3_asmoperand; +} + +def neon_uimm4_shift : Operand, + ImmLeaf { + let ParserMatchClass = uimm4_asmoperand; +} + +def neon_uimm5_shift : Operand, + ImmLeaf { + let ParserMatchClass = uimm5_asmoperand; +} + class N2VShiftLong opcode, string asmop, string DestT, string SrcT, ValueType DestTy, ValueType SrcTy, Operand ImmTy, SDPatternOperator ExtOp> @@ -1619,7 +1638,7 @@ class N2VShiftLong opcode, string asmop, string DestT, [(set (DestTy VPR128:$Rd), (DestTy (shl (DestTy (ExtOp (SrcTy VPR64:$Rn))), - (DestTy (Neon_vdup (i32 imm:$Imm))))))], + (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], NoItinerary>; class N2VShiftLongHigh opcode, string asmop, string DestT, @@ -1633,40 +1652,40 @@ class N2VShiftLongHigh opcode, string asmop, string DestT, (DestTy (shl (DestTy (ExtOp (SrcTy (getTop VPR128:$Rn)))), - (DestTy (Neon_vdup (i32 imm:$Imm))))))], + (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], NoItinerary>; multiclass NeonI_N2VShLL opcode, string asmop, SDNode ExtOp> { // 64-bit vector types. def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, - uimm3, ExtOp> { + neon_uimm3_shift, ExtOp> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, - uimm4, ExtOp> { + neon_uimm4_shift, ExtOp> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, - uimm5, ExtOp> { + neon_uimm5_shift, ExtOp> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } // 128-bit vector types - def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", - v8i16, v8i8, 8, uimm3, ExtOp, Neon_High16B> { + def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8, + 8, neon_uimm3_shift, ExtOp, Neon_High16B> { let Inst{22-19} = 0b0001; // immh:immb = 0001xxx } - def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", - v4i32, v4i16, 4, uimm4, ExtOp, Neon_High8H> { + def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16, + 4, neon_uimm4_shift, ExtOp, Neon_High8H> { let Inst{22-20} = 0b001; // immh:immb = 001xxxx } - def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", - v2i64, v2i32, 2, uimm5, ExtOp, Neon_High4S> { + def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32, + 2, neon_uimm5_shift, ExtOp, Neon_High4S> { let Inst{22-21} = 0b01; // immh:immb = 01xxxxx } @@ -4693,25 +4712,25 @@ def neon_uimm0_bare : Operand, } def neon_uimm1_bare : Operand, - ImmLeaf { + ImmLeaf { let ParserMatchClass = neon_uimm1_asmoperand; let PrintMethod = "printUImmBareOperand"; } def neon_uimm2_bare : Operand, - ImmLeaf { + ImmLeaf { let ParserMatchClass = neon_uimm2_asmoperand; let PrintMethod = "printUImmBareOperand"; } def neon_uimm3_bare : Operand, - ImmLeaf { + ImmLeaf { let ParserMatchClass = uimm3_asmoperand; let PrintMethod = "printUImmBareOperand"; } def neon_uimm4_bare : Operand, - ImmLeaf { + ImmLeaf { let ParserMatchClass = uimm4_asmoperand; let PrintMethod = "printUImmBareOperand"; } @@ -5096,13 +5115,13 @@ def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; def neon_uimm3 : Operand, - ImmLeaf { + ImmLeaf { let ParserMatchClass = uimm3_asmoperand; let PrintMethod = "printUImmHexOperand"; } def neon_uimm4 : Operand, - ImmLeaf { + ImmLeaf { let ParserMatchClass = uimm4_asmoperand; let PrintMethod = "printUImmHexOperand"; } @@ -6538,6 +6557,855 @@ def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))), def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))), (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; +class NeonI_REV size, bit Q, bit U, + bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy, + SDPatternOperator Neon_Rev> + : NeonI_2VMisc ; + +def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128, + v16i8, Neon_rev64>; +def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128, + v8i16, Neon_rev64>; +def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128, + v4i32, Neon_rev64>; +def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64, + v8i8, Neon_rev64>; +def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64, + v4i16, Neon_rev64>; +def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64, + v2i32, Neon_rev64>; + +def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>; +def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>; + +def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128, + v16i8, Neon_rev32>; +def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128, + v8i16, Neon_rev32>; +def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64, + v8i8, Neon_rev32>; +def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64, + v4i16, Neon_rev32>; + +def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128, + v16i8, Neon_rev16>; +def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64, + v8i8, Neon_rev16>; + +multiclass NeonI_PairwiseAdd opcode, + SDPatternOperator Neon_Padd> { + def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.16b", + [(set (v8i16 VPR128:$Rd), + (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], + NoItinerary>; + + def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.8b", + [(set (v4i16 VPR64:$Rd), + (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], + NoItinerary>; + + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.8h", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], + NoItinerary>; + + def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.4h", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], + NoItinerary>; + + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.4s", + [(set (v2i64 VPR128:$Rd), + (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], + NoItinerary>; + + def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.1d, $Rn.2s", + [(set (v1i64 VPR64:$Rd), + (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))], + NoItinerary>; +} + +defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, + int_arm_neon_vpaddls>; +defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010, + int_arm_neon_vpaddlu>; + +multiclass NeonI_PairwiseAddAcc opcode, + SDPatternOperator Neon_Padd> { + let Constraints = "$src = $Rd" in { + def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.16b", + [(set (v8i16 VPR128:$Rd), + (v8i16 (Neon_Padd + (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], + NoItinerary>; + + def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.8b", + [(set (v4i16 VPR64:$Rd), + (v4i16 (Neon_Padd + (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], + NoItinerary>; + + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.8h", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_Padd + (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], + NoItinerary>; + + def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.4h", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_Padd + (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], + NoItinerary>; + + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.4s", + [(set (v2i64 VPR128:$Rd), + (v2i64 (Neon_Padd + (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], + NoItinerary>; + + def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.1d, $Rn.2s", + [(set (v1i64 VPR64:$Rd), + (v1i64 (Neon_Padd + (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))], + NoItinerary>; + } +} + +defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110, + int_arm_neon_vpadals>; +defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110, + int_arm_neon_vpadalu>; + +multiclass NeonI_2VMisc_BHSDsize_1Arg opcode> { + def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.16b, $Rn.16b", + [], NoItinerary>; + + def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.8h", + [], NoItinerary>; + + def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [], NoItinerary>; + + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.2d", + [], NoItinerary>; + + def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.8b, $Rn.8b", + [], NoItinerary>; + + def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.4h", + [], NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [], NoItinerary>; +} + +defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>; +defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>; +defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>; +defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>; + +multiclass NeonI_2VMisc_BHSD_1Arg_Pattern { + def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))), + (v16i8 (!cast(Prefix # 16b) (v16i8 VPR128:$Rn)))>; + + def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))), + (v8i16 (!cast(Prefix # 8h) (v8i16 VPR128:$Rn)))>; + + def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))), + (v4i32 (!cast(Prefix # 4s) (v4i32 VPR128:$Rn)))>; + + def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))), + (v2i64 (!cast(Prefix # 2d) (v2i64 VPR128:$Rn)))>; + + def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))), + (v8i8 (!cast(Prefix # 8b) (v8i8 VPR64:$Rn)))>; + + def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))), + (v4i16 (!cast(Prefix # 4h) (v4i16 VPR64:$Rn)))>; + + def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))), + (v2i32 (!cast(Prefix # 2s) (v2i32 VPR64:$Rn)))>; +} + +defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>; +defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>; +defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>; + +def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>; +def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>; + +def : Pat<(v16i8 (sub + (v16i8 Neon_AllZero), + (v16i8 VPR128:$Rn))), + (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>; +def : Pat<(v8i8 (sub + (v8i8 Neon_AllZero), + (v8i8 VPR64:$Rn))), + (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>; +def : Pat<(v8i16 (sub + (v8i16 (bitconvert (v16i8 Neon_AllZero))), + (v8i16 VPR128:$Rn))), + (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>; +def : Pat<(v4i16 (sub + (v4i16 (bitconvert (v8i8 Neon_AllZero))), + (v4i16 VPR64:$Rn))), + (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>; +def : Pat<(v4i32 (sub + (v4i32 (bitconvert (v16i8 Neon_AllZero))), + (v4i32 VPR128:$Rn))), + (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>; +def : Pat<(v2i32 (sub + (v2i32 (bitconvert (v8i8 Neon_AllZero))), + (v2i32 VPR64:$Rn))), + (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>; +def : Pat<(v2i64 (sub + (v2i64 (bitconvert (v16i8 Neon_AllZero))), + (v2i64 VPR128:$Rn))), + (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>; + +multiclass NeonI_2VMisc_BHSDsize_2Args opcode> { + let Constraints = "$src = $Rd" in { + def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.16b, $Rn.16b", + [], NoItinerary>; + + def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.8h", + [], NoItinerary>; + + def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [], NoItinerary>; + + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.2d", + [], NoItinerary>; + + def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.8b, $Rn.8b", + [], NoItinerary>; + + def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.4h", + [], NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [], NoItinerary>; + } +} + +defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>; +defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>; + +multiclass NeonI_2VMisc_BHSD_2Args_Pattern { + def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))), + (v16i8 (!cast(Prefix # 16b) + (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>; + + def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))), + (v8i16 (!cast(Prefix # 8h) + (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>; + + def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))), + (v4i32 (!cast(Prefix # 4s) + (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>; + + def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))), + (v2i64 (!cast(Prefix # 2d) + (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>; + + def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))), + (v8i8 (!cast(Prefix # 8b) + (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>; + + def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))), + (v4i16 (!cast(Prefix # 4h) + (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>; + + def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))), + (v2i32 (!cast(Prefix # 2s) + (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>; +} + +defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>; +defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>; + +multiclass NeonI_2VMisc_BHSsizes { + def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.16b, $Rn.16b", + [(set (v16i8 VPR128:$Rd), + (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))], + NoItinerary>; + + def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.8h", + [(set (v8i16 VPR128:$Rd), + (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], + NoItinerary>; + + def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], + NoItinerary>; + + def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.8b, $Rn.8b", + [(set (v8i8 VPR64:$Rd), + (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], + NoItinerary>; + + def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.4h", + [(set (v4i16 VPR64:$Rd), + (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], + NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], + NoItinerary>; +} + +defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>; +defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>; + +multiclass NeonI_2VMisc_Bsize size, + bits<5> Opcode> { + def 16b : NeonI_2VMisc<0b1, U, size, Opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.16b, $Rn.16b", + [], NoItinerary>; + + def 8b : NeonI_2VMisc<0b0, U, size, Opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.8b, $Rn.8b", + [], NoItinerary>; +} + +defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>; +defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>; +defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>; + +def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b", + (NOT16b VPR128:$Rd, VPR128:$Rn), 0>; +def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b", + (NOT8b VPR64:$Rd, VPR64:$Rn), 0>; + +def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))), + (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>; +def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))), + (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>; + +def : Pat<(v16i8 (xor + (v16i8 VPR128:$Rn), + (v16i8 Neon_AllOne))), + (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>; +def : Pat<(v8i8 (xor + (v8i8 VPR64:$Rn), + (v8i8 Neon_AllOne))), + (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>; +def : Pat<(v8i16 (xor + (v8i16 VPR128:$Rn), + (v8i16 (bitconvert (v16i8 Neon_AllOne))))), + (NOT16b VPR128:$Rn)>; +def : Pat<(v4i16 (xor + (v4i16 VPR64:$Rn), + (v4i16 (bitconvert (v8i8 Neon_AllOne))))), + (NOT8b VPR64:$Rn)>; +def : Pat<(v4i32 (xor + (v4i32 VPR128:$Rn), + (v4i32 (bitconvert (v16i8 Neon_AllOne))))), + (NOT16b VPR128:$Rn)>; +def : Pat<(v2i32 (xor + (v2i32 VPR64:$Rn), + (v2i32 (bitconvert (v8i8 Neon_AllOne))))), + (NOT8b VPR64:$Rn)>; +def : Pat<(v2i64 (xor + (v2i64 VPR128:$Rn), + (v2i64 (bitconvert (v16i8 Neon_AllOne))))), + (NOT16b VPR128:$Rn)>; + +def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))), + (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>; +def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))), + (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>; + +multiclass NeonI_2VMisc_SDsizes opcode, + SDPatternOperator Neon_Op> { + def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [(set (v4f32 VPR128:$Rd), + (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], + NoItinerary>; + + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.2d", + [(set (v2f64 VPR128:$Rd), + (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], + NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [(set (v2f32 VPR64:$Rd), + (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))], + NoItinerary>; +} + +defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>; +defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>; + +multiclass NeonI_2VMisc_HSD_Narrow opcode> { + def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.8b, $Rn.8h", + [], NoItinerary>; + + def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4h, $Rn.4s", + [], NoItinerary>; + + def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2s, $Rn.2d", + [], NoItinerary>; + + let Constraints = "$Rd = $src" in { + def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.16b, $Rn.8h", + [], NoItinerary>; + + def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.8h, $Rn.4s", + [], NoItinerary>; + + def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.4s, $Rn.2d", + [], NoItinerary>; + } +} + +defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>; +defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>; +defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>; +defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>; + +multiclass NeonI_2VMisc_Narrow_Patterns { + def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))), + (v8i8 (!cast(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>; + + def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))), + (v4i16 (!cast(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>; + + def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))), + (v2i32 (!cast(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>; + + def : Pat<(v16i8 (concat_vectors + (v8i8 VPR64:$src), + (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))), + (!cast(Prefix # 8h16b) + (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), + VPR128:$Rn)>; + + def : Pat<(v8i16 (concat_vectors + (v4i16 VPR64:$src), + (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))), + (!cast(Prefix # 4s8h) + (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), + VPR128:$Rn)>; + + def : Pat<(v4i32 (concat_vectors + (v2i32 VPR64:$src), + (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))), + (!cast(Prefix # 2d4s) + (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), + VPR128:$Rn)>; +} + +defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>; +defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>; +defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>; +defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>; + +multiclass NeonI_2VMisc_SHIFT opcode> { + def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR128:$Rd), + (ins VPR64:$Rn, uimm_exact8:$Imm), + asmop # "\t$Rd.8h, $Rn.8b, $Imm", + [], NoItinerary>; + + def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR128:$Rd), + (ins VPR64:$Rn, uimm_exact16:$Imm), + asmop # "\t$Rd.4s, $Rn.4h, $Imm", + [], NoItinerary>; + + def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR128:$Rd), + (ins VPR64:$Rn, uimm_exact32:$Imm), + asmop # "\t$Rd.2d, $Rn.2s, $Imm", + [], NoItinerary>; + + def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), + (ins VPR128:$Rn, uimm_exact8:$Imm), + asmop # "2\t$Rd.8h, $Rn.16b, $Imm", + [], NoItinerary>; + + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), + (ins VPR128:$Rn, uimm_exact16:$Imm), + asmop # "2\t$Rd.4s, $Rn.8h, $Imm", + [], NoItinerary>; + + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), + (ins VPR128:$Rn, uimm_exact32:$Imm), + asmop # "2\t$Rd.2d, $Rn.4s, $Imm", + [], NoItinerary>; +} + +defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>; + +class NeonI_SHLL_Patterns + : Pat<(DesTy (shl + (DesTy (ExtOp (OpTy VPR64:$Rn))), + (DesTy (Neon_vdup + (i32 Neon_Imm:$Imm))))), + (!cast("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>; + +class NeonI_SHLL_High_Patterns + : Pat<(DesTy (shl + (DesTy (ExtOp + (OpTy (GetHigh VPR128:$Rn)))), + (DesTy (Neon_vdup + (i32 Neon_Imm:$Imm))))), + (!cast("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>; + +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; + +multiclass NeonI_2VMisc_SD_Narrow opcode> { + def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4h, $Rn.4s", + [], NoItinerary>; + + def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2s, $Rn.2d", + [], NoItinerary>; + + let Constraints = "$src = $Rd" in { + def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.8h, $Rn.4s", + [], NoItinerary>; + + def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.4s, $Rn.2d", + [], NoItinerary>; + } +} + +defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>; + +multiclass NeonI_2VMisc_Narrow_Pattern { + + def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))), + (!cast(prefix # "4s4h") (v4f32 VPR128:$Rn))>; + + def : Pat<(v8i16 (concat_vectors + (v4i16 VPR64:$src), + (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))), + (!cast(prefix # "4s8h") + (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), + (v4f32 VPR128:$Rn))>; + + def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))), + (!cast(prefix # "2d2s") (v2f64 VPR128:$Rn))>; + + def : Pat<(v4f32 (concat_vectors + (v2f32 VPR64:$src), + (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))), + (!cast(prefix # "2d4s") + (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), + (v2f64 VPR128:$Rn))>; +} + +defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>; + +multiclass NeonI_2VMisc_D_Narrow opcode> { + def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2s, $Rn.2d", + [], NoItinerary>; + + def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.4s, $Rn.2d", + [], NoItinerary> { + let Constraints = "$src = $Rd"; + } + + def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))), + (!cast(prefix # "2d2s") VPR128:$Rn)>; + + def : Pat<(v4f32 (concat_vectors + (v2f32 VPR64:$src), + (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))), + (!cast(prefix # "2d4s") + (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), + VPR128:$Rn)>; +} + +defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>; + +def Neon_High4Float : PatFrag<(ops node:$in), + (extract_subvector (v4f32 node:$in), (iPTR 2))>; + +multiclass NeonI_2VMisc_HS_Extend opcode> { + def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.4s, $Rn.4h", + [], NoItinerary>; + + def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2d, $Rn.2s", + [], NoItinerary>; + + def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "2\t$Rd.4s, $Rn.8h", + [], NoItinerary>; + + def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "2\t$Rd.2d, $Rn.4s", + [], NoItinerary>; +} + +defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; + +multiclass NeonI_2VMisc_Extend_Pattern { + def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))), + (!cast(prefix # "4h4s") VPR64:$Rn)>; + + def : Pat<(v4f32 (int_arm_neon_vcvthf2fp + (v4i16 (Neon_High8H + (v8i16 VPR128:$Rn))))), + (!cast(prefix # "8h4s") VPR128:$Rn)>; + + def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))), + (!cast(prefix # "2s2d") VPR64:$Rn)>; + + def : Pat<(v2f64 (fextend + (v2f32 (Neon_High4Float + (v4f32 VPR128:$Rn))))), + (!cast(prefix # "4s2d") VPR128:$Rn)>; +} + +defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">; + +multiclass NeonI_2VMisc_SD_Conv opcode, + ValueType ResTy4s, ValueType OpTy4s, + ValueType ResTy2d, ValueType OpTy2d, + ValueType ResTy2s, ValueType OpTy2s, + SDPatternOperator Neon_Op> { + + def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [(set (ResTy4s VPR128:$Rd), + (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))], + NoItinerary>; + + def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.2d", + [(set (ResTy2d VPR128:$Rd), + (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], + NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [(set (ResTy2s VPR64:$Rd), + (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))], + NoItinerary>; +} + +multiclass NeonI_2VMisc_fp_to_int opcode, SDPatternOperator Neon_Op> { + defm _ : NeonI_2VMisc_SD_Conv; +} + +defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010, + int_aarch64_neon_fcvtns>; +defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010, + int_aarch64_neon_fcvtnu>; +defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010, + int_aarch64_neon_fcvtps>; +defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010, + int_aarch64_neon_fcvtpu>; +defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011, + int_aarch64_neon_fcvtms>; +defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011, + int_aarch64_neon_fcvtmu>; +defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>; +defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>; +defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100, + int_aarch64_neon_fcvtas>; +defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100, + int_aarch64_neon_fcvtau>; + +multiclass NeonI_2VMisc_int_to_fp opcode, SDPatternOperator Neon_Op> { + defm _ : NeonI_2VMisc_SD_Conv; +} + +defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>; +defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>; + +multiclass NeonI_2VMisc_fp_to_fp opcode, SDPatternOperator Neon_Op> { + defm _ : NeonI_2VMisc_SD_Conv; +} + +defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000, + int_aarch64_neon_frintn>; +defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>; +defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>; +defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>; +defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>; +defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>; +defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>; +defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101, + int_arm_neon_vrecpe>; +defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, + int_arm_neon_vrsqrte>; +defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, + int_aarch64_neon_fsqrt>; + +multiclass NeonI_2VMisc_S_Conv opcode, SDPatternOperator Neon_Op> { + def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], + NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], + NoItinerary>; +} + +defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100, + int_arm_neon_vrecpe>; +defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100, + int_arm_neon_vrsqrte>; + // Crypto Class class NeonI_Cryptoaes_2v size, bits<5> opcode, string asmop, SDPatternOperator opnode> diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index 0848f9b03dd2..68f03425b276 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -51,8 +51,7 @@ define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) { define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) { ;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = icmp ne <8 x i8> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 @@ -60,8 +59,7 @@ define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) { define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) { ;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = icmp ne <16 x i8> %A, %B; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 @@ -69,8 +67,7 @@ define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) { define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) { ;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = icmp ne <4 x i16> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 @@ -78,8 +75,7 @@ define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) { define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) { ;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = icmp ne <8 x i16> %A, %B; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 @@ -87,8 +83,7 @@ define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) { define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) { ;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = icmp ne <2 x i32> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -96,8 +91,7 @@ define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) { define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) { ;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = icmp ne <4 x i32> %A, %B; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -105,8 +99,7 @@ define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) { define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) { ;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = icmp ne <2 x i64> %A, %B; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -867,8 +860,7 @@ define <2 x i64> @cmltz2xi64(<2 x i64> %A) { define <8 x i8> @cmneqz8xi8(<8 x i8> %A) { ;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0 -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = icmp ne <8 x i8> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 @@ -876,8 +868,7 @@ define <8 x i8> @cmneqz8xi8(<8 x i8> %A) { define <16 x i8> @cmneqz16xi8(<16 x i8> %A) { ;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = icmp ne <16 x i8> %A, zeroinitializer; %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 @@ -885,8 +876,7 @@ define <16 x i8> @cmneqz16xi8(<16 x i8> %A) { define <4 x i16> @cmneqz4xi16(<4 x i16> %A) { ;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0 -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = icmp ne <4 x i16> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 @@ -894,8 +884,7 @@ define <4 x i16> @cmneqz4xi16(<4 x i16> %A) { define <8 x i16> @cmneqz8xi16(<8 x i16> %A) { ;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = icmp ne <8 x i16> %A, zeroinitializer; %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 @@ -903,8 +892,7 @@ define <8 x i16> @cmneqz8xi16(<8 x i16> %A) { define <2 x i32> @cmneqz2xi32(<2 x i32> %A) { ;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0 -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = icmp ne <2 x i32> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -912,8 +900,7 @@ define <2 x i32> @cmneqz2xi32(<2 x i32> %A) { define <4 x i32> @cmneqz4xi32(<4 x i32> %A) { ;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = icmp ne <4 x i32> %A, zeroinitializer; %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -921,8 +908,7 @@ define <4 x i32> @cmneqz4xi32(<4 x i32> %A) { define <2 x i64> @cmneqz2xi64(<2 x i64> %A) { ;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = icmp ne <2 x i64> %A, zeroinitializer; %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1369,8 +1355,7 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) { ;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp uno <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1382,8 +1367,7 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) { ;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s ;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp uno <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1395,8 +1379,7 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) { ;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp uno <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1408,8 +1391,7 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) { ;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp ueq <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1421,8 +1403,7 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) { ;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s ;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ueq <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1434,8 +1415,7 @@ define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) { ;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d ;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ueq <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1445,8 +1425,7 @@ define <2 x i32> @fcmuge2xfloat(<2 x float> %A, <2 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; UGE = ULE with swapped operands, ULE implemented as !OGT. ;CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp uge <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1456,8 +1435,7 @@ define <4 x i32> @fcmuge4xfloat(<4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; UGE = ULE with swapped operands, ULE implemented as !OGT. ;CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp uge <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1467,8 +1445,7 @@ define <2 x i64> @fcmuge2xdouble(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; UGE = ULE with swapped operands, ULE implemented as !OGT. ;CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp uge <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1478,8 +1455,7 @@ define <2 x i32> @fcmugt2xfloat(<2 x float> %A, <2 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; UGT = ULT with swapped operands, ULT implemented as !OGE. ;CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp ugt <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1489,16 +1465,14 @@ define <4 x i32> @fcmugt4xfloat(<4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; UGT = ULT with swapped operands, ULT implemented as !OGE. ;CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ugt <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @fcmugt2xdouble(<2 x double> %A, <2 x double> %B) { ;CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ugt <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1508,8 +1482,7 @@ define <2 x i32> @fcmule2xfloat(<2 x float> %A, <2 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; ULE implemented as !OGT. ;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp ule <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1519,8 +1492,7 @@ define <4 x i32> @fcmule4xfloat(<4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; ULE implemented as !OGT. ;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ule <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1529,8 +1501,7 @@ define <2 x i64> @fcmule2xdouble(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; ULE implemented as !OGT. ;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ule <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1540,8 +1511,7 @@ define <2 x i32> @fcmult2xfloat(<2 x float> %A, <2 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; ULT implemented as !OGE. ;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp ult <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1551,8 +1521,7 @@ define <4 x i32> @fcmult4xfloat(<4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; ULT implemented as !OGE. ;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ult <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1561,8 +1530,7 @@ define <2 x i64> @fcmult2xdouble(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; ULT implemented as !OGE. ;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ult <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1572,8 +1540,7 @@ define <2 x i32> @fcmune2xfloat(<2 x float> %A, <2 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; UNE = !OEQ. ;CHECK: fcmeq {{v[0-9]+}}.2s, v0.2s, v1.2s -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp une <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1583,8 +1550,7 @@ define <4 x i32> @fcmune4xfloat(<4 x float> %A, <4 x float> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; UNE = !OEQ. ;CHECK: fcmeq {{v[0-9]+}}.4s, v0.4s, v1.4s -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp une <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1593,8 +1559,7 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) { ; Using registers other than v0, v1 are possible, but would be odd. ; UNE = !OEQ. ;CHECK: fcmeq {{v[0-9]+}}.2d, v0.2d, v1.2d -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp une <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1766,8 +1731,7 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) { ;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1778,8 +1742,7 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) { ;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1790,8 +1753,7 @@ define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) { ;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1800,8 +1762,7 @@ define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) { define <2 x i32> @fcmugez2xfloat(<2 x float> %A) { ; UGE with zero = !OLT ;CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp uge <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1810,8 +1771,7 @@ define <2 x i32> @fcmugez2xfloat(<2 x float> %A) { define <4 x i32> @fcmugez4xfloat(<4 x float> %A) { ; UGE with zero = !OLT ;CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp uge <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1819,8 +1779,7 @@ define <4 x i32> @fcmugez4xfloat(<4 x float> %A) { define <2 x i64> @fcmugez2xdouble(<2 x double> %A) { ; UGE with zero = !OLT ;CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp uge <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1829,8 +1788,7 @@ define <2 x i64> @fcmugez2xdouble(<2 x double> %A) { define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) { ; UGT with zero = !OLE ;CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp ugt <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1839,8 +1797,7 @@ define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) { define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) { ; UGT with zero = !OLE ;CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ugt <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1848,8 +1805,7 @@ define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) { define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) { ; UGT with zero = !OLE ;CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ugt <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1858,8 +1814,7 @@ define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) { define <2 x i32> @fcmultz2xfloat(<2 x float> %A) { ; ULT with zero = !OGE ;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp ult <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1867,8 +1822,7 @@ define <2 x i32> @fcmultz2xfloat(<2 x float> %A) { define <4 x i32> @fcmultz4xfloat(<4 x float> %A) { ;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ult <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1876,8 +1830,7 @@ define <4 x i32> @fcmultz4xfloat(<4 x float> %A) { define <2 x i64> @fcmultz2xdouble(<2 x double> %A) { ;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ult <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1887,8 +1840,7 @@ define <2 x i64> @fcmultz2xdouble(<2 x double> %A) { define <2 x i32> @fcmulez2xfloat(<2 x float> %A) { ; ULE with zero = !OGT ;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp ule <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1897,8 +1849,7 @@ define <2 x i32> @fcmulez2xfloat(<2 x float> %A) { define <4 x i32> @fcmulez4xfloat(<4 x float> %A) { ; ULE with zero = !OGT ;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ule <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1907,8 +1858,7 @@ define <4 x i32> @fcmulez4xfloat(<4 x float> %A) { define <2 x i64> @fcmulez2xdouble(<2 x double> %A) { ; ULE with zero = !OGT ;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp ule <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1917,8 +1867,7 @@ define <2 x i64> @fcmulez2xdouble(<2 x double> %A) { define <2 x i32> @fcmunez2xfloat(<2 x float> %A) { ; UNE with zero = !OEQ with zero ;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp une <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1927,8 +1876,7 @@ define <2 x i32> @fcmunez2xfloat(<2 x float> %A) { define <4 x i32> @fcmunez4xfloat(<4 x float> %A) { ; UNE with zero = !OEQ with zero ;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp une <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1936,8 +1884,7 @@ define <4 x i32> @fcmunez4xfloat(<4 x float> %A) { define <2 x i64> @fcmunez2xdouble(<2 x double> %A) { ; UNE with zero = !OEQ with zero ;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp une <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -1949,8 +1896,7 @@ define <2 x i32> @fcmunoz2xfloat(<2 x float> %A) { ;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0 ;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -;CHECK-NEXT: movi {{v[0-9]+}}.8b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = fcmp uno <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -1961,8 +1907,7 @@ define <4 x i32> @fcmunoz4xfloat(<4 x float> %A) { ;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp uno <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -1973,8 +1918,7 @@ define <2 x i64> @fcmunoz2xdouble(<2 x double> %A) { ;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 ;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0 ;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -;CHECK-NEXT: movi {{v[0-9]+}}.16b, #0xff -;CHECK-NEXT: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = fcmp uno <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 diff --git a/llvm/test/MC/AArch64/neon-diagnostics.s b/llvm/test/MC/AArch64/neon-diagnostics.s index 12d56a5fb090..044827e07043 100644 --- a/llvm/test/MC/AArch64/neon-diagnostics.s +++ b/llvm/test/MC/AArch64/neon-diagnostics.s @@ -5089,6 +5089,1040 @@ // CHECK-ERROR: ucvtf d21, s14, #64 // CHECK-ERROR: ^ +//------------------------------------------------------------------------------ +// Element reverse +//------------------------------------------------------------------------------ + rev64 v6.2d, v8.2d + rev32 v30.2s, v31.2s + rev32 v30.4s, v31.4s + rev32 v30.2d, v31.2d + rev16 v21.4h, v1.4h + rev16 v21.8h, v1.8h + rev16 v21.2s, v1.2s + rev16 v21.4s, v1.4s + rev16 v21.2d, v1.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev64 v6.2d, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev32 v30.2s, v31.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev32 v30.4s, v31.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev32 v30.2d, v31.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev16 v21.4h, v1.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev16 v21.8h, v1.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev16 v21.2s, v1.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev16 v21.4s, v1.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rev16 v21.2d, v1.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Signed integer pairwise add long +//------------------------------------------------------------------------------ + + saddlp v3.8h, v21.8h + saddlp v8.8b, v5.8b + saddlp v9.8h, v1.4s + saddlp v0.4s, v1.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: saddlp v3.8h, v21.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: saddlp v8.8b, v5.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: saddlp v9.8h, v1.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: saddlp v0.4s, v1.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Unsigned integer pairwise add long +//------------------------------------------------------------------------------ + + uaddlp v3.8h, v21.8h + uaddlp v8.8b, v5.8b + uaddlp v9.8h, v1.4s + uaddlp v0.4s, v1.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uaddlp v3.8h, v21.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uaddlp v8.8b, v5.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uaddlp v9.8h, v1.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uaddlp v0.4s, v1.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Signed integer pairwise add and accumulate long +//------------------------------------------------------------------------------ + + sadalp v3.16b, v21.16b + sadalp v8.4h, v5.4h + sadalp v9.4s, v1.4s + sadalp v0.4h, v1.2s + sadalp v12.2d, v4.8h + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sadalp v3.16b, v21.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sadalp v8.4h, v5.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sadalp v9.4s, v1.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sadalp v0.4h, v1.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sadalp v12.2d, v4.8h +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Unsigned integer pairwise add and accumulate long +//------------------------------------------------------------------------------ + + uadalp v3.16b, v21.16b + uadalp v8.4h, v5.4h + uadalp v9.4s, v1.4s + uadalp v0.4h, v1.2s + uadalp v12.2d, v4.8h + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uadalp v3.16b, v21.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uadalp v8.4h, v5.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uadalp v9.4s, v1.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uadalp v0.4h, v1.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uadalp v12.2d, v4.8h +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Signed integer saturating accumulate of unsigned value +//------------------------------------------------------------------------------ + + suqadd v0.16b, v31.8b + suqadd v1.8b, v9.8h + suqadd v13.4h, v21.4s + suqadd v4.2s, v0.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: suqadd v0.16b, v31.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: suqadd v1.8b, v9.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: suqadd v13.4h, v21.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: suqadd v4.2s, v0.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Unsigned integer saturating accumulate of signed value +//------------------------------------------------------------------------------ + + usqadd v0.16b, v31.8b + usqadd v2.8h, v4.4h + usqadd v13.4h, v21.4s + usqadd v4.2s, v0.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usqadd v0.16b, v31.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usqadd v2.8h, v4.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usqadd v13.4h, v21.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usqadd v4.2s, v0.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Integer saturating absolute +//------------------------------------------------------------------------------ + + sqabs v0.16b, v31.8b + sqabs v2.8h, v4.4h + sqabs v6.4s, v8.2s + sqabs v6.2d, v8.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqabs v0.16b, v31.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqabs v2.8h, v4.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqabs v6.4s, v8.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqabs v6.2d, v8.2s +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Signed integer saturating negate +//------------------------------------------------------------------------------ + + sqneg v0.16b, v31.8b + sqneg v2.8h, v4.4h + sqneg v6.4s, v8.2s + sqneg v6.2d, v8.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqneg v0.16b, v31.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqneg v2.8h, v4.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqneg v6.4s, v8.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqneg v6.2d, v8.2s +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Integer absolute +//------------------------------------------------------------------------------ + + abs v0.16b, v31.8b + abs v2.8h, v4.4h + abs v6.4s, v8.2s + abs v6.2d, v8.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: abs v0.16b, v31.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: abs v2.8h, v4.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: abs v6.4s, v8.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: abs v6.2d, v8.2s +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Integer count leading sign bits +//------------------------------------------------------------------------------ + + cls v0.2d, v31.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: cls v0.2d, v31.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Integer count leading zeros +//------------------------------------------------------------------------------ + + clz v0.2d, v31.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: clz v0.2d, v31.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Population count +//------------------------------------------------------------------------------ + + cnt v2.8h, v4.8h + cnt v6.4s, v8.4s + cnt v6.2d, v8.2d + cnt v13.4h, v21.4h + cnt v4.2s, v0.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: cnt v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: cnt v6.4s, v8.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: cnt v6.2d, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: cnt v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: cnt v4.2s, v0.2s +// CHECK-ERROR: ^ + + +//------------------------------------------------------------------------------ +// Bitwise NOT +//------------------------------------------------------------------------------ + + not v2.8h, v4.8h + not v6.4s, v8.4s + not v6.2d, v8.2d + not v13.4h, v21.4h + not v4.2s, v0.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: not v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: not v6.4s, v8.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: not v6.2d, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: not v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: not v4.2s, v0.2s +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Bitwise reverse +//------------------------------------------------------------------------------ + + rbit v2.8h, v4.8h + rbit v6.4s, v8.4s + rbit v6.2d, v8.2d + rbit v13.4h, v21.4h + rbit v4.2s, v0.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rbit v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rbit v6.4s, v8.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rbit v6.2d, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rbit v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: rbit v4.2s, v0.2s +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Floating-point absolute +//------------------------------------------------------------------------------ + + fabs v0.16b, v31.16b + fabs v2.8h, v4.8h + fabs v1.8b, v9.8b + fabs v13.4h, v21.4h + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fabs v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fabs v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fabs v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fabs v13.4h, v21.4h +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Floating-point negate +//------------------------------------------------------------------------------ + + fneg v0.16b, v31.16b + fneg v2.8h, v4.8h + fneg v1.8b, v9.8b + fneg v13.4h, v21.4h + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fneg v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fneg v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fneg v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fneg v13.4h, v21.4h +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Integer extract and narrow +//------------------------------------------------------------------------------ + + xtn v0.16b, v31.8h + xtn v2.8h, v4.4s + xtn v6.4s, v8.2d + xtn2 v1.8b, v9.8h + xtn2 v13.4h, v21.4s + xtn2 v4.2s, v0.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: xtn v0.16b, v31.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: xtn v2.8h, v4.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: xtn v6.4s, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: xtn2 v1.8b, v9.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: xtn2 v13.4h, v21.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: xtn2 v4.2s, v0.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Signed integer saturating extract and unsigned narrow +//------------------------------------------------------------------------------ + + sqxtun v0.16b, v31.8h + sqxtun v2.8h, v4.4s + sqxtun v6.4s, v8.2d + sqxtun2 v1.8b, v9.8h + sqxtun2 v13.4h, v21.4s + sqxtun2 v4.2s, v0.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun v0.16b, v31.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun v2.8h, v4.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun v6.4s, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun2 v1.8b, v9.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun2 v13.4h, v21.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtun2 v4.2s, v0.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Signed integer saturating extract and narrow +//------------------------------------------------------------------------------ + + sqxtn v0.16b, v31.8h + sqxtn v2.8h, v4.4s + sqxtn v6.4s, v8.2d + sqxtn2 v1.8b, v9.8h + sqxtn2 v13.4h, v21.4s + sqxtn2 v4.2s, v0.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn v0.16b, v31.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn v2.8h, v4.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn v6.4s, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn2 v1.8b, v9.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn2 v13.4h, v21.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqxtn2 v4.2s, v0.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Unsigned integer saturating extract and narrow +//------------------------------------------------------------------------------ + + uqxtn v0.16b, v31.8h + uqxtn v2.8h, v4.4s + uqxtn v6.4s, v8.2d + uqxtn2 v1.8b, v9.8h + uqxtn2 v13.4h, v21.4s + uqxtn2 v4.2s, v0.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn v0.16b, v31.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn v2.8h, v4.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn v6.4s, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn2 v1.8b, v9.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn2 v13.4h, v21.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: uqxtn2 v4.2s, v0.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Integer shift left long +//------------------------------------------------------------------------------ + + shll2 v2.8h, v4.16b, #7 + shll2 v6.4s, v8.8h, #15 + shll2 v6.2d, v8.4s, #31 + shll v2.8h, v4.16b, #8 + shll v6.4s, v8.8h, #16 + shll v6.2d, v8.4s, #32 + shll v2.8h, v4.8b, #8 + shll v6.4s, v8.4h, #16 + shll v6.2d, v8.2s, #32 + shll2 v2.8h, v4.8b, #5 + shll2 v6.4s, v8.4h, #14 + shll2 v6.2d, v8.2s, #1 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll2 v2.8h, v4.16b, #7 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll2 v6.4s, v8.8h, #15 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll2 v6.2d, v8.4s, #31 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll v2.8h, v4.16b, #8 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll v6.4s, v8.8h, #16 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll v6.2d, v8.4s, #32 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll2 v2.8h, v4.8b, #5 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll2 v6.4s, v8.4h, #14 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: shll2 v6.2d, v8.2s, #1 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Floating-point convert downsize +//------------------------------------------------------------------------------ + + fcvtn v2.8h, v4.4s + fcvtn v6.4s, v8.2d + fcvtn2 v13.4h, v21.4s + fcvtn2 v4.2s, v0.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtn v2.8h, v4.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtn v6.4s, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtn2 v13.4h, v21.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtn2 v4.2s, v0.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Floating-point convert downsize with inexact +//------------------------------------------------------------------------------ + + fcvtxn v6.4s, v8.2d + fcvtxn2 v4.2s, v0.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtxn v6.4s, v8.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtxn2 v4.2s, v0.2d +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Floating-point convert upsize +//------------------------------------------------------------------------------ + + fcvtl2 v9.4s, v1.4h + fcvtl2 v0.2d, v1.2s + fcvtl v12.4s, v4.8h + fcvtl v17.2d, v28.4s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtl2 v9.4s, v1.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtl2 v0.2d, v1.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtl v12.4s, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtl v17.2d, v28.4s +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------------ +// Floating-point round to integral +//------------------------------------------------------------------------------ + + frintn v0.16b, v31.16b + frintn v2.8h, v4.8h + frintn v1.8b, v9.8b + frintn v13.4h, v21.4h + + frinta v0.16b, v31.16b + frinta v2.8h, v4.8h + frinta v1.8b, v9.8b + frinta v13.4h, v21.4h + + frintp v0.16b, v31.16b + frintp v2.8h, v4.8h + frintp v1.8b, v9.8b + frintp v13.4h, v21.4h + + frintm v0.16b, v31.16b + frintm v2.8h, v4.8h + frintm v1.8b, v9.8b + frintm v13.4h, v21.4h + + frintx v0.16b, v31.16b + frintx v2.8h, v4.8h + frintx v1.8b, v9.8b + frintx v13.4h, v21.4h + + frintz v0.16b, v31.16b + frintz v2.8h, v4.8h + frintz v1.8b, v9.8b + frintz v13.4h, v21.4h + + frinti v0.16b, v31.16b + frinti v2.8h, v4.8h + frinti v1.8b, v9.8b + frinti v13.4h, v21.4h + + fcvtns v0.16b, v31.16b + fcvtns v2.8h, v4.8h + fcvtns v1.8b, v9.8b + fcvtns v13.4h, v21.4h + + fcvtnu v0.16b, v31.16b + fcvtnu v2.8h, v4.8h + fcvtnu v1.8b, v9.8b + fcvtnu v13.4h, v21.4h + + fcvtps v0.16b, v31.16b + fcvtps v2.8h, v4.8h + fcvtps v1.8b, v9.8b + fcvtps v13.4h, v21.4h + + fcvtpu v0.16b, v31.16b + fcvtpu v2.8h, v4.8h + fcvtpu v1.8b, v9.8b + fcvtpu v13.4h, v21.4h + + fcvtms v0.16b, v31.16b + fcvtms v2.8h, v4.8h + fcvtms v1.8b, v9.8b + fcvtms v13.4h, v21.4h + + fcvtmu v0.16b, v31.16b + fcvtmu v2.8h, v4.8h + fcvtmu v1.8b, v9.8b + fcvtmu v13.4h, v21.4h + + fcvtzs v0.16b, v31.16b + fcvtzs v2.8h, v4.8h + fcvtzs v1.8b, v9.8b + fcvtzs v13.4h, v21.4h + + fcvtzu v0.16b, v31.16b + fcvtzu v2.8h, v4.8h + fcvtzu v1.8b, v9.8b + fcvtzu v13.4h, v21.4h + + fcvtas v0.16b, v31.16b + fcvtas v2.8h, v4.8h + fcvtas v1.8b, v9.8b + fcvtas v13.4h, v21.4h + + fcvtau v0.16b, v31.16b + fcvtau v2.8h, v4.8h + fcvtau v1.8b, v9.8b + fcvtau v13.4h, v21.4h + + urecpe v0.16b, v31.16b + urecpe v2.8h, v4.8h + urecpe v1.8b, v9.8b + urecpe v13.4h, v21.4h + urecpe v1.2d, v9.2d + + ursqrte v0.16b, v31.16b + ursqrte v2.8h, v4.8h + ursqrte v1.8b, v9.8b + ursqrte v13.4h, v21.4h + ursqrte v1.2d, v9.2d + + scvtf v0.16b, v31.16b + scvtf v2.8h, v4.8h + scvtf v1.8b, v9.8b + scvtf v13.4h, v21.4h + + ucvtf v0.16b, v31.16b + ucvtf v2.8h, v4.8h + ucvtf v1.8b, v9.8b + ucvtf v13.4h, v21.4h + + frecpe v0.16b, v31.16b + frecpe v2.8h, v4.8h + frecpe v1.8b, v9.8b + frecpe v13.4h, v21.4h + + frsqrte v0.16b, v31.16b + frsqrte v2.8h, v4.8h + frsqrte v1.8b, v9.8b + frsqrte v13.4h, v21.4h + + fsqrt v0.16b, v31.16b + fsqrt v2.8h, v4.8h + fsqrt v1.8b, v9.8b + fsqrt v13.4h, v21.4h + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintn v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintn v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintn v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintn v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frinta v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frinta v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frinta v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frinta v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintp v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintp v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintp v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintp v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintm v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintm v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintm v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintm v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintx v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintx v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintx v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintx v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintz v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintz v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintz v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frintz v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frinti v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frinti v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frinti v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frinti v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtns v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtns v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtns v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtns v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtnu v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtnu v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtnu v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtnu v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtps v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtps v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtps v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtps v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtpu v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtpu v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtpu v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtpu v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtms v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtms v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtms v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtms v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtmu v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtmu v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtmu v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtmu v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzs v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzs v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzs v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzs v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzu v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzu v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzu v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtzu v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtas v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtas v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtas v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtas v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtau v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtau v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtau v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fcvtau v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: urecpe v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: urecpe v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: urecpe v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: urecpe v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: urecpe v1.2d, v9.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ursqrte v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ursqrte v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ursqrte v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ursqrte v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ursqrte v1.2d, v9.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ucvtf v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ucvtf v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ucvtf v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ucvtf v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecpe v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecpe v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecpe v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecpe v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frsqrte v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frsqrte v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frsqrte v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frsqrte v13.4h, v21.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fsqrt v0.16b, v31.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fsqrt v2.8h, v4.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fsqrt v1.8b, v9.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fsqrt v13.4h, v21.4h +// CHECK-ERROR: ^ + //---------------------------------------------------------------------- // Scalar Floating-point Convert To Signed Fixed-point (Immediate) //----------------------------------------------------------------------