From 738a97a1dbf484904db6f467f93d3c3da67e9925 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 23 Nov 2009 21:57:23 +0000 Subject: [PATCH] Massive refactoring of NEON instructions. Separate opcode from data size specifier suffix, move \t up stream to instruction format, and fix more 80 column violations. This fixes the NEON asm printing so the "predicate" field is printed between the opcode and the data type suffix. llvm-svn: 89706 --- llvm/lib/Target/ARM/ARMInstrFormats.td | 102 +- llvm/lib/Target/ARM/ARMInstrNEON.td | 1766 +++++++++++++----------- 2 files changed, 1011 insertions(+), 857 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index 9949cf1c3749..e76e93cf671c 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -1217,30 +1217,45 @@ class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, // class NeonI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat( + !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), + !strconcat("\t", asm)); + let Pattern = pattern; + list Predicates = [HasNEON]; +} + +// Same as NeonI except it does not have a "data type" specifier. +class NeonXI pattern> : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm)); let Pattern = pattern; list Predicates = [HasNEON]; } class NI pattern> - : NeonI { } -class NI4 pattern> - : NeonI { +class NI4 pattern> + : NeonXI { } class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, string cstr, list pattern> - : NeonI { + string opc, string dt, string asm, string cstr, list pattern> + : NeonI { let Inst{31-24} = 0b11110100; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1249,8 +1264,15 @@ class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, } class NDataI pattern> + : NeonI { + let Inst{31-25} = 0b1111001; +} + +class NDataXI pattern> - : NeonI { let Inst{31-25} = 0b1111001; } @@ -1259,8 +1281,8 @@ class NDataI op21_19, bits<4> op11_8, bit op7, bit op6, bit op5, bit op4, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, string cstr, list pattern> - : NDataI { + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { let Inst{23} = op23; let Inst{21-19} = op21_19; let Inst{11-8} = op11_8; @@ -1272,10 +1294,25 @@ class N1ModImm op21_19, bits<4> op11_8, bit op7, bit op6, // NEON 2 vector register format. class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { + let Inst{24-23} = op24_23; + let Inst{21-20} = op21_20; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; +} + +// Same as N2V except it doesn't have a datatype suffix. +class N2VX op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, string cstr, list pattern> - : NDataI { + : NDataXI { let Inst{24-23} = op24_23; let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; @@ -1288,8 +1325,8 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, // NEON 2 vector register with immediate. class N2VImm op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, string cstr, list pattern> - : NDataI { + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { let Inst{24} = op24; let Inst{23} = op23; let Inst{11-8} = op11_8; @@ -1300,9 +1337,22 @@ class N2VImm op11_8, bit op7, bit op6, bit op4, // NEON 3 vector register format. class N3V op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { + let Inst{24} = op24; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; +} + +// Same as N3VX except it doesn't have a data type suffix. +class N3VX op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, string cstr, list pattern> - : NDataI { + : NDataXI { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1314,29 +1364,37 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, // NEON VMOVs between scalar and core registers. class NVLaneOp opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list pattern> - : AI { + string opc, string dt, string asm, list pattern> + : InstARM { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{6-5} = opcod3; let Inst{4} = 1; + + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat( + !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), + !strconcat("\t", asm)); + let Pattern = pattern; list Predicates = [HasNEON]; } class NVGetLane opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> + string opc, string dt, string asm, list pattern> : NVLaneOp; + opc, dt, asm, pattern>; class NVSetLane opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> + string opc, string dt, string asm, list pattern> : NVLaneOp; + opc, dt, asm, pattern>; class NVDup opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> + string opc, string dt, string asm, list pattern> : NVLaneOp; + opc, dt, asm, pattern>; // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON // for single-precision FP. diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 2357669a7881..a4fe75236b6f 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -146,7 +146,7 @@ def VLDMS : NI<(outs), // Use vldmia to load a Q register as a D register pair. def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, - "vldmia", "\t$addr, ${dst:dregpair}", + "vldmia", "$addr, ${dst:dregpair}", [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -158,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), // Use vstmia to store a Q register as a D register pair. def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, - "vstmia", "\t$addr, ${src:dregpair}", + "vstmia", "$addr, ${src:dregpair}", [(store (v2f64 QPR:$src), addrmode4:$addr)]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -168,217 +168,219 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), } // VLD1 : Vector Load (multiple single elements) -class VLD1D op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VLD1D op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - OpcodeStr, "\t\\{$dst\\}, $addr", "", + OpcodeStr, Dt, "\\{$dst\\}, $addr", "", [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -class VLD1Q op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VLD1Q op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - OpcodeStr, "\t${dst:dregpair}, $addr", "", + OpcodeStr, Dt, "${dst:dregpair}, $addr", "", [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -def VLD1d8 : VLD1D<0b0000, "vld1.8", v8i8, int_arm_neon_vld1>; -def VLD1d16 : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>; -def VLD1d32 : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>; -def VLD1df : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>; -def VLD1d64 : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>; +def VLD1d8 : VLD1D<0b0000, "vld1", "8", v8i8, int_arm_neon_vld1>; +def VLD1d16 : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>; +def VLD1d32 : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>; +def VLD1df : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>; +def VLD1d64 : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>; -def VLD1q8 : VLD1Q<0b0000, "vld1.8", v16i8, int_arm_neon_vld1>; -def VLD1q16 : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>; -def VLD1q32 : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>; -def VLD1qf : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>; -def VLD1q64 : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>; +def VLD1q8 : VLD1Q<0b0000, "vld1", "8", v16i8, int_arm_neon_vld1>; +def VLD1q16 : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>; +def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>; +def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>; +def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D op7_4, string OpcodeStr> +class VLD2D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr", "", []>; -class VLD2Q op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$dst1,$dst2\\}, $addr", "", []>; +class VLD2Q op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0011,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; -def VLD2d8 : VLD2D<0b0000, "vld2.8">; -def VLD2d16 : VLD2D<0b0100, "vld2.16">; -def VLD2d32 : VLD2D<0b1000, "vld2.32">; +def VLD2d8 : VLD2D<0b0000, "vld2", "8">; +def VLD2d16 : VLD2D<0b0100, "vld2", "16">; +def VLD2d32 : VLD2D<0b1000, "vld2", "32">; def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64", "\t\\{$dst1,$dst2\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2\\}, $addr", "", []>; -def VLD2q8 : VLD2Q<0b0000, "vld2.8">; -def VLD2q16 : VLD2Q<0b0100, "vld2.16">; -def VLD2q32 : VLD2Q<0b1000, "vld2.32">; +def VLD2q8 : VLD2Q<0b0000, "vld2", "8">; +def VLD2q16 : VLD2Q<0b0100, "vld2", "16">; +def VLD2q32 : VLD2Q<0b1000, "vld2", "32">; // VLD3 : Vector Load (multiple 3-element structures) -class VLD3D op7_4, string OpcodeStr> +class VLD3D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; -class VLD3WB op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; +class VLD3WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr", + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr", "$addr.addr = $wb", []>; -def VLD3d8 : VLD3D<0b0000, "vld3.8">; -def VLD3d16 : VLD3D<0b0100, "vld3.16">; -def VLD3d32 : VLD3D<0b1000, "vld3.32">; +def VLD3d8 : VLD3D<0b0000, "vld3", "8">; +def VLD3d16 : VLD3D<0b0100, "vld3", "16">; +def VLD3d32 : VLD3D<0b1000, "vld3", "32">; def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64", "\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; // vld3 to double-spaced even registers. -def VLD3q8a : VLD3WB<0b0000, "vld3.8">; -def VLD3q16a : VLD3WB<0b0100, "vld3.16">; -def VLD3q32a : VLD3WB<0b1000, "vld3.32">; +def VLD3q8a : VLD3WB<0b0000, "vld3", "8">; +def VLD3q16a : VLD3WB<0b0100, "vld3", "16">; +def VLD3q32a : VLD3WB<0b1000, "vld3", "32">; // vld3 to double-spaced odd registers. -def VLD3q8b : VLD3WB<0b0000, "vld3.8">; -def VLD3q16b : VLD3WB<0b0100, "vld3.16">; -def VLD3q32b : VLD3WB<0b1000, "vld3.32">; +def VLD3q8b : VLD3WB<0b0000, "vld3", "8">; +def VLD3q16b : VLD3WB<0b0100, "vld3", "16">; +def VLD3q32b : VLD3WB<0b1000, "vld3", "32">; // VLD4 : Vector Load (multiple 4-element structures) -class VLD4D op7_4, string OpcodeStr> +class VLD4D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0000,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; -class VLD4WB op7_4, string OpcodeStr> +class VLD4WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0001,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "$addr.addr = $wb", []>; -def VLD4d8 : VLD4D<0b0000, "vld4.8">; -def VLD4d16 : VLD4D<0b0100, "vld4.16">; -def VLD4d32 : VLD4D<0b1000, "vld4.32">; +def VLD4d8 : VLD4D<0b0000, "vld4", "8">; +def VLD4d16 : VLD4D<0b0100, "vld4", "16">; +def VLD4d32 : VLD4D<0b1000, "vld4", "32">; def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64", "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; // vld4 to double-spaced even registers. -def VLD4q8a : VLD4WB<0b0000, "vld4.8">; -def VLD4q16a : VLD4WB<0b0100, "vld4.16">; -def VLD4q32a : VLD4WB<0b1000, "vld4.32">; +def VLD4q8a : VLD4WB<0b0000, "vld4", "8">; +def VLD4q16a : VLD4WB<0b0100, "vld4", "16">; +def VLD4q32a : VLD4WB<0b1000, "vld4", "32">; // vld4 to double-spaced odd registers. -def VLD4q8b : VLD4WB<0b0000, "vld4.8">; -def VLD4q16b : VLD4WB<0b0100, "vld4.16">; -def VLD4q32b : VLD4WB<0b1000, "vld4.32">; +def VLD4q8b : VLD4WB<0b0000, "vld4", "8">; +def VLD4q16b : VLD4WB<0b0100, "vld4", "16">; +def VLD4q32b : VLD4WB<0b1000, "vld4", "32">; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LN op11_8, string OpcodeStr> +class VLD2LN op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, - OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2, + OpcodeStr, Dt, "\\{$dst1[$lane],$dst2[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2", []>; // vld2 to single-spaced registers. -def VLD2LNd8 : VLD2LN<0b0001, "vld2.8">; -def VLD2LNd16 : VLD2LN<0b0101, "vld2.16"> { +def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">; +def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; } -def VLD2LNd32 : VLD2LN<0b1001, "vld2.32"> { +def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; } // vld2 to double-spaced even registers. -def VLD2LNq16a: VLD2LN<0b0101, "vld2.16"> { +def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } -def VLD2LNq32a: VLD2LN<0b1001, "vld2.32"> { +def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } // vld2 to double-spaced odd registers. -def VLD2LNq16b: VLD2LN<0b0101, "vld2.16"> { +def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } -def VLD2LNq32b: VLD2LN<0b1001, "vld2.32"> { +def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LN op11_8, string OpcodeStr> +class VLD3LN op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, - OpcodeStr, - "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3, + OpcodeStr, Dt, + "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; // vld3 to single-spaced registers. -def VLD3LNd8 : VLD3LN<0b0010, "vld3.8"> { +def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; } -def VLD3LNd16 : VLD3LN<0b0110, "vld3.16"> { +def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; } -def VLD3LNd32 : VLD3LN<0b1010, "vld3.32"> { +def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; } // vld3 to double-spaced even registers. -def VLD3LNq16a: VLD3LN<0b0110, "vld3.16"> { +def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32a: VLD3LN<0b1010, "vld3.32"> { +def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } // vld3 to double-spaced odd registers. -def VLD3LNq16b: VLD3LN<0b0110, "vld3.16"> { +def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32b: VLD3LN<0b1010, "vld3.32"> { +def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } // VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LN op11_8, string OpcodeStr> +class VLD4LN op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, - OpcodeStr, - "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4, + OpcodeStr, Dt, + "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; // vld4 to single-spaced registers. -def VLD4LNd8 : VLD4LN<0b0011, "vld4.8">; -def VLD4LNd16 : VLD4LN<0b0111, "vld4.16"> { +def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">; +def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; } -def VLD4LNd32 : VLD4LN<0b1011, "vld4.32"> { +def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; } // vld4 to double-spaced even registers. -def VLD4LNq16a: VLD4LN<0b0111, "vld4.16"> { +def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } -def VLD4LNq32a: VLD4LN<0b1011, "vld4.32"> { +def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } // vld4 to double-spaced odd registers. -def VLD4LNq16b: VLD4LN<0b0111, "vld4.16"> { +def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } -def VLD4LNq32b: VLD4LN<0b1011, "vld4.32"> { +def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } @@ -390,217 +392,219 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4.32"> { } // mayLoad = 1, hasExtraDefRegAllocReq = 1 // VST1 : Vector Store (multiple single elements) -class VST1D op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VST1D op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - OpcodeStr, "\t\\{$src\\}, $addr", "", + OpcodeStr, Dt, "\\{$src\\}, $addr", "", [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VST1Q op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - OpcodeStr, "\t${src:dregpair}, $addr", "", + OpcodeStr, Dt, "${src:dregpair}, $addr", "", [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "vst1.8", v8i8, int_arm_neon_vst1>; -def VST1d16 : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>; -def VST1d32 : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>; -def VST1df : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>; -def VST1d64 : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>; +def VST1d8 : VST1D<0b0000, "vst1", "8", v8i8, int_arm_neon_vst1>; +def VST1d16 : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>; +def VST1d32 : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>; +def VST1df : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>; +def VST1d64 : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>; -def VST1q8 : VST1Q<0b0000, "vst1.8", v16i8, int_arm_neon_vst1>; -def VST1q16 : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>; -def VST1q32 : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>; -def VST1qf : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>; -def VST1q64 : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>; +def VST1q8 : VST1Q<0b0000, "vst1", "8", v16i8, int_arm_neon_vst1>; +def VST1q16 : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>; +def VST1q32 : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>; +def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>; +def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>; } // hasExtraSrcRegAllocReq let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { // VST2 : Vector Store (multiple 2-element structures) -class VST2D op7_4, string OpcodeStr> +class VST2D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b1000,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - OpcodeStr, "\t\\{$src1,$src2\\}, $addr", "", []>; -class VST2Q op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$src1,$src2\\}, $addr", "", []>; +class VST2Q op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0011,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr", + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; -def VST2d8 : VST2D<0b0000, "vst2.8">; -def VST2d16 : VST2D<0b0100, "vst2.16">; -def VST2d32 : VST2D<0b1000, "vst2.32">; +def VST2d8 : VST2D<0b0000, "vst2", "8">; +def VST2d16 : VST2D<0b0100, "vst2", "16">; +def VST2d32 : VST2D<0b1000, "vst2", "32">; def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - "vst1.64", "\t\\{$src1,$src2\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2\\}, $addr", "", []>; -def VST2q8 : VST2Q<0b0000, "vst2.8">; -def VST2q16 : VST2Q<0b0100, "vst2.16">; -def VST2q32 : VST2Q<0b1000, "vst2.32">; +def VST2q8 : VST2Q<0b0000, "vst2", "8">; +def VST2q16 : VST2Q<0b0100, "vst2", "16">; +def VST2q32 : VST2Q<0b1000, "vst2", "32">; // VST3 : Vector Store (multiple 3-element structures) -class VST3D op7_4, string OpcodeStr> +class VST3D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0100,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr", "", []>; -class VST3WB op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr", "", []>; +class VST3WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr", + OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr", "$addr.addr = $wb", []>; -def VST3d8 : VST3D<0b0000, "vst3.8">; -def VST3d16 : VST3D<0b0100, "vst3.16">; -def VST3d32 : VST3D<0b1000, "vst3.32">; +def VST3d8 : VST3D<0b0000, "vst3", "8">; +def VST3d16 : VST3D<0b0100, "vst3", "16">; +def VST3d32 : VST3D<0b1000, "vst3", "32">; def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - "vst1.64", "\t\\{$src1,$src2,$src3\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2,$src3\\}, $addr", "", []>; // vst3 to double-spaced even registers. -def VST3q8a : VST3WB<0b0000, "vst3.8">; -def VST3q16a : VST3WB<0b0100, "vst3.16">; -def VST3q32a : VST3WB<0b1000, "vst3.32">; +def VST3q8a : VST3WB<0b0000, "vst3", "8">; +def VST3q16a : VST3WB<0b0100, "vst3", "16">; +def VST3q32a : VST3WB<0b1000, "vst3", "32">; // vst3 to double-spaced odd registers. -def VST3q8b : VST3WB<0b0000, "vst3.8">; -def VST3q16b : VST3WB<0b0100, "vst3.16">; -def VST3q32b : VST3WB<0b1000, "vst3.32">; +def VST3q8b : VST3WB<0b0000, "vst3", "8">; +def VST3q16b : VST3WB<0b0100, "vst3", "16">; +def VST3q32b : VST3WB<0b1000, "vst3", "32">; // VST4 : Vector Store (multiple 4-element structures) -class VST4D op7_4, string OpcodeStr> +class VST4D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0000,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr", + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; -class VST4WB op7_4, string OpcodeStr> +class VST4WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr", + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "$addr.addr = $wb", []>; -def VST4d8 : VST4D<0b0000, "vst4.8">; -def VST4d16 : VST4D<0b0100, "vst4.16">; -def VST4d32 : VST4D<0b1000, "vst4.32">; +def VST4d8 : VST4D<0b0000, "vst4", "8">; +def VST4d16 : VST4D<0b0100, "vst4", "16">; +def VST4d32 : VST4D<0b1000, "vst4", "32">; def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - "vst1.64", "\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; // vst4 to double-spaced even registers. -def VST4q8a : VST4WB<0b0000, "vst4.8">; -def VST4q16a : VST4WB<0b0100, "vst4.16">; -def VST4q32a : VST4WB<0b1000, "vst4.32">; +def VST4q8a : VST4WB<0b0000, "vst4", "8">; +def VST4q16a : VST4WB<0b0100, "vst4", "16">; +def VST4q32a : VST4WB<0b1000, "vst4", "32">; // vst4 to double-spaced odd registers. -def VST4q8b : VST4WB<0b0000, "vst4.8">; -def VST4q16b : VST4WB<0b0100, "vst4.16">; -def VST4q32b : VST4WB<0b1000, "vst4.32">; +def VST4q8b : VST4WB<0b0000, "vst4", "8">; +def VST4q16b : VST4WB<0b0100, "vst4", "16">; +def VST4q32b : VST4WB<0b1000, "vst4", "32">; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. // VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LN op11_8, string OpcodeStr> +class VST2LN op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, - OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr", - "", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VST, + OpcodeStr, Dt, "\\{$src1[$lane],$src2[$lane]\\}, $addr", + "", []>; // vst2 to single-spaced registers. -def VST2LNd8 : VST2LN<0b0001, "vst2.8">; -def VST2LNd16 : VST2LN<0b0101, "vst2.16"> { +def VST2LNd8 : VST2LN<0b0001, "vst2", "8">; +def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; } -def VST2LNd32 : VST2LN<0b1001, "vst2.32"> { +def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; } // vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0101, "vst2.16"> { +def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } -def VST2LNq32a: VST2LN<0b1001, "vst2.32"> { +def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } // vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0101, "vst2.16"> { +def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } -def VST2LNq32b: VST2LN<0b1001, "vst2.32"> { +def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } // VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LN op11_8, string OpcodeStr> +class VST3LN op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, - OpcodeStr, - "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr", "", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST, + OpcodeStr, Dt, + "\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr", "", []>; // vst3 to single-spaced registers. -def VST3LNd8 : VST3LN<0b0010, "vst3.8"> { +def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; } -def VST3LNd16 : VST3LN<0b0110, "vst3.16"> { +def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; } -def VST3LNd32 : VST3LN<0b1010, "vst3.32"> { +def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; } // vst3 to double-spaced even registers. -def VST3LNq16a: VST3LN<0b0110, "vst3.16"> { +def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32a: VST3LN<0b1010, "vst3.32"> { +def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } // vst3 to double-spaced odd registers. -def VST3LNq16b: VST3LN<0b0110, "vst3.16"> { +def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32b: VST3LN<0b1010, "vst3.32"> { +def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } // VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LN op11_8, string OpcodeStr> +class VST4LN op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, - OpcodeStr, - "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr", - "", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST, + OpcodeStr, Dt, + "\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr", + "", []>; // vst4 to single-spaced registers. -def VST4LNd8 : VST4LN<0b0011, "vst4.8">; -def VST4LNd16 : VST4LN<0b0111, "vst4.16"> { +def VST4LNd8 : VST4LN<0b0011, "vst4", "8">; +def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; } -def VST4LNd32 : VST4LN<0b1011, "vst4.32"> { +def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; } // vst4 to double-spaced even registers. -def VST4LNq16a: VST4LN<0b0111, "vst4.16"> { +def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } -def VST4LNq32a: VST4LN<0b1011, "vst4.32"> { +def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } // vst4 to double-spaced odd registers. -def VST4LNq16b: VST4LN<0b0111, "vst4.16"> { +def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } -def VST4LNq32b: VST4LN<0b1011, "vst4.32"> { +def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } @@ -652,25 +656,25 @@ def SubReg_i32_lane : SDNodeXForm op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; // Basic 2-register operations, scalar single-precision. class N2VDs op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; + IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; class N2VDsPat : NEONFPPat<(ResTy (OpNode SPR:$a)), @@ -681,27 +685,27 @@ class N2VDsPat // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Basic 2-register intrinsics, scalar single-precision class N2VDInts op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; + OpcodeStr, Dt, "$dst, $src", "", []>; class N2VDIntsPat : NEONFPPat<(f32 (OpNode SPR:$a)), @@ -712,49 +716,62 @@ class N2VDIntsPat // Narrow 2-register intrinsics. class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp> : N2V; // Long 2-register intrinsics (currently only used for VMOVL). class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N2V; // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. -class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr> +class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), (ins DPR:$src1, DPR:$src2), IIC_VPERMD, - OpcodeStr, "\t$dst1, $dst2", + OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; class N2VQShuffle op19_18, bits<5> op11_7, - InstrItinClass itin, string OpcodeStr> + InstrItinClass itin, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), (ins QPR:$src1, QPR:$src2), itin, - OpcodeStr, "\t$dst1, $dst2", + OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; // Basic 3-register operations, both double- and quad-register. class N3VD op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + let isCommutable = Commutable; +} +// Same as N3VD but no data type. +class N3VDX op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3VX { let isCommutable = Commutable; } class N3VDSL op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, "\t$dst, $src1, $src2[$lane]", "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -762,11 +779,11 @@ class N3VDSL op21_20, bits<4> op11_8, let isCommutable = 0; } class N3VDSL16 op21_20, bits<4> op11_8, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), IIC_VMULi16D, - OpcodeStr, "\t$dst, $src1, $src2[$lane]", "", + OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), @@ -775,20 +792,31 @@ class N3VDSL16 op21_20, bits<4> op11_8, } class N3VQ op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + let isCommutable = Commutable; +} +class N3VQX op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3VX { let isCommutable = Commutable; } class N3VQSL op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, "\t$dst, $src1, $src2[$lane]", "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -796,11 +824,12 @@ class N3VQSL op21_20, bits<4> op11_8, let isCommutable = 0; } class N3VQSL16 op21_20, bits<4> op11_8, - string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), IIC_VMULi16Q, - OpcodeStr, "\t$dst, $src1, $src2[$lane]", "", + OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -810,11 +839,11 @@ class N3VQSL16 op21_20, bits<4> op11_8, // Basic 3-register operations, scalar single-precision class N3VDs op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { let isCommutable = Commutable; } class N3VDsPat @@ -826,19 +855,20 @@ class N3VDsPat // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, "\t$dst, $src1, $src2[$lane]", "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -846,10 +876,10 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, OpcodeStr, "\t$dst, $src1, $src2[$lane]", "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), @@ -858,19 +888,21 @@ class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, "\t$dst, $src1, $src2[$lane]", "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -878,10 +910,11 @@ class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, OpcodeStr, "\t$dst, $src1, $src2[$lane]", "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -891,30 +924,32 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, // Multiply-Add/Sub operations, both double- and quad-register. class N3VDMulOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, - OpcodeStr, "\t$dst, $src2, $src3[$lane]", "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$src2, (Ty (NEONvduplane (Ty DPR_VFP2:$src3), imm:$lane)))))))]>; class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, - OpcodeStr, "\t$dst, $src2, $src3[$lane]", "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$src2, @@ -922,32 +957,33 @@ class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))))]>; class N3VQMulOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; class N3VQMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, - OpcodeStr, "\t$dst, $src2, $src3[$lane]", "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))))]>; class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, - OpcodeStr, "\t$dst, $src2, $src3[$lane]", "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, @@ -956,12 +992,12 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, // Multiply-Add/Sub operations, scalar single-precision class N3VDMulOps op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; class N3VDMulOpsPat : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), @@ -974,50 +1010,51 @@ class N3VDMulOpsPat // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; class N3VQInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V; class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; class N3VLInt3SL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, InstrItinClass iti // Narrowing 3-register intrinsics. class N3VNInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyD, ValueType TyQ, + string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } // Long 3-register intrinsics. class N3VLInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, - Intrinsic IntOp, bit Commutable> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; class N3VLIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, InstrItinClass itin // Wide 3-register intrinsics. class N3VWInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, + string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } // Pairwise long 2-register intrinsics, both double- and quad-register. class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Pairwise long 2-register accumulate intrinsics, // both double- and quad-register. // The destination register is also used as the first source operand register. class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Shift by immediate, // both double- and quad-register. class N2VDSh op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode OpNode> : N2VImm; class N2VQSh op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode OpNode> : N2VImm; // Long shift by immediate. class N2VLSh op11_8, bit op7, bit op6, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; // Narrow shift by immediate. class N2VNSh op11_8, bit op7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; // Shift right by immediate and accumulate, // both double- and quad-register. class N2VDShAdd op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; class N2VQShAdd op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; class N2VQShIns op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; // Convert, with fractional bits immediate, // both double- and quad-register. class N2VCvtD op11_8, bit op7, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; class N2VCvtQ op11_8, bit op7, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; //===----------------------------------------------------------------------===// @@ -1208,44 +1253,55 @@ class N2VCvtQ op11_8, bit op7, bit op4, multiclass N3V_QHS op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode OpNode, bit Commutable = 0> { + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> { // 64-bit vector types. def v8i8 : N3VD; + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, OpNode, Commutable>; def v4i16 : N3VD; + OpcodeStr, !strconcat(Dt, "16"), + v4i16, v4i16, OpNode, Commutable>; def v2i32 : N3VD; + OpcodeStr, !strconcat(Dt, "32"), + v2i32, v2i32, OpNode, Commutable>; // 128-bit vector types. def v16i8 : N3VQ; + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, OpNode, Commutable>; def v8i16 : N3VQ; + OpcodeStr, !strconcat(Dt, "16"), + v8i16, v8i16, OpNode, Commutable>; def v4i32 : N3VQ; + OpcodeStr, !strconcat(Dt, "32"), + v4i32, v4i32, OpNode, Commutable>; } -multiclass N3VSL_HS op11_8, string OpcodeStr, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), +multiclass N3VSL_HS op11_8, string OpcodeStr, string Dt, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), + v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), v4i32, v2i32, ShOp>; } // ....then also with element size 64 bits: multiclass N3V_QHSD op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, SDNode OpNode, bit Commutable = 0> + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> : N3V_QHS { + OpcodeStr, Dt, OpNode, Commutable> { def v1i64 : N3VD; + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, OpNode, Commutable>; def v2i64 : N3VQ; + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, OpNode, Commutable>; } @@ -1253,27 +1309,30 @@ multiclass N3V_QHSD op11_8, bit op4, // source operand element sizes of 16, 32 and 64 bits: multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp> { def v8i8 : N2VNInt; + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, IntOp>; def v4i16 : N2VNInt; + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, IntOp>; def v2i32 : N2VNInt; + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, IntOp>; } // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: multiclass N2VLInt_QHS op24_23, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v8i16 : N2VLInt; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; def v4i32 : N2VLInt; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i64 : N2VLInt; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } @@ -1283,74 +1342,85 @@ multiclass N2VLInt_QHS op24_23, bits<5> op11_7, bit op6, bit op4, multiclass N3VInt_HS op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> { // 64-bit vector types. def v4i16 : N3VDInt; def v2i32 : N3VDInt; // 128-bit vector types. def v8i16 : N3VQInt; def v4i32 : N3VQInt; } multiclass N3VIntSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, - !strconcat(OpcodeStr, "16"), v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, - !strconcat(OpcodeStr, "32"), v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, - !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, - !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VInt_QHS op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> : N3VInt_HS { + OpcodeStr, Dt, IntOp, Commutable> { def v8i8 : N3VDInt; + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, IntOp, Commutable>; def v16i8 : N3VQInt; + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, IntOp, Commutable>; } // ....then also with element size of 64 bits: multiclass N3VInt_QHSD op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> : N3VInt_QHS { + OpcodeStr, Dt, IntOp, Commutable> { def v1i64 : N3VDInt; + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, IntOp, Commutable>; def v2i64 : N3VQInt; + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, IntOp, Commutable>; } // Neon Narrowing 3-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N3VNInt_HSD op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v8i8 : N3VNInt { + def v8i8 : N3VNInt; - def v4i16 : N3VNInt; - def v2i32 : N3VNInt; } @@ -1359,41 +1429,50 @@ multiclass N3VNInt_HSD op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { def v4i32 : N3VLInt; + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, IntOp, Commutable>; def v2i64 : N3VLInt; + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, IntOp, Commutable>; } multiclass N3VLIntSL_HS op11_8, - InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp> { def v4i16 : N3VLIntSL16; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i32 : N3VLIntSL; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VLInt_QHS op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS { + : N3VLInt_HS { def v8i16 : N3VLInt; + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, IntOp, Commutable>; } // Neon Wide 3-register vector intrinsics, // source operand element sizes of 8, 16 and 32 bits: multiclass N3VWInt_QHS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v8i16 : N3VWInt { + def v8i16 : N3VWInt; - def v4i32 : N3VWInt; - def v2i64 : N3VWInt; } @@ -1403,57 +1482,57 @@ multiclass N3VWInt_QHS op11_8, bit op4, multiclass N3VMulOp_QHS op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode OpNode> { + string OpcodeStr, string Dt, SDNode OpNode> { // 64-bit vector types. def v8i8 : N3VDMulOp; + OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; def v4i16 : N3VDMulOp; + OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; def v2i32 : N3VDMulOp; + OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; // 128-bit vector types. def v16i8 : N3VQMulOp; + OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; def v8i16 : N3VQMulOp; + OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; def v4i32 : N3VQMulOp; + OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; } multiclass N3VMulOpSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode ShOp> { + string OpcodeStr, string Dt, SDNode ShOp> { def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, - !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, - !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, - !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>; def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, - !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>; } // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N3VDInt3; + OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N3VDInt3; + OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; def v2i32 : N3VDInt3; + OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N3VQInt3; + OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; def v8i16 : N3VQInt3; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; def v4i32 : N3VQInt3; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1461,27 +1540,27 @@ multiclass N3VInt3_QHS op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v4i32 : N3VLInt3; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i64 : N3VLInt3; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } multiclass N3VLInt3SL_HS op11_8, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v4i16 : N3VLInt3SL16; + OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; def v2i32 : N3VLInt3SL; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> - : N3VLInt3_HS { + string OpcodeStr, string Dt, Intrinsic IntOp> + : N3VLInt3_HS { def v8i16 : N3VLInt3; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } @@ -1490,22 +1569,22 @@ multiclass N3VLInt3_QHS op11_8, bit op4, multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDInt; + itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N2VDInt; + itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; def v2i32 : N2VDInt; + itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQInt; + itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; def v8i16 : N2VQInt; + itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; def v4i32 : N2VQInt; + itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1513,22 +1592,22 @@ multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt; + OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; def v4i16 : N2VDPLInt; + OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; def v2i32 : N2VDPLInt; + OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQPLInt; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; def v8i16 : N2VQPLInt; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; def v4i32 : N2VQPLInt; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; } @@ -1536,61 +1615,62 @@ multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt2; + OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; def v4i16 : N2VDPLInt2; + OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; def v2i32 : N2VDPLInt2; + OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQPLInt2; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; def v8i16 : N2VQPLInt2; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; def v4i32 : N2VQPLInt2; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; } // Neon 2-register vector shift by immediate, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VSh_QHSD op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, SDNode OpNode> { + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh { + OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDSh { + OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDSh { + OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDSh; + OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQSh { + OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQSh { + OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQSh { + OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQSh; + OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx } @@ -1598,39 +1678,39 @@ multiclass N2VSh_QHSD op11_8, bit op4, // Neon Shift-Accumulate vector operations, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VShAdd_QHSD op11_8, bit op4, - string OpcodeStr, SDNode ShOp> { + string OpcodeStr, string Dt, SDNode ShOp> { // 64-bit vector types. def v8i8 : N2VDShAdd { + OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDShAdd { + OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDShAdd { + OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDShAdd; + OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQShAdd { + OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQShAdd { + OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQShAdd { + OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQShAdd; + OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; // imm6 = xxxxxx } @@ -1641,53 +1721,53 @@ multiclass N2VShIns_QHSD op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. def v8i8 : N2VDShIns { + OpcodeStr, "8", v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDShIns { + OpcodeStr, "16", v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDShIns { + OpcodeStr, "32", v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDShIns; + OpcodeStr, "64", v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQShIns { + OpcodeStr, "8", v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQShIns { + OpcodeStr, "16", v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQShIns { + OpcodeStr, "32", v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQShIns; + OpcodeStr, "64", v2i64, ShOp>; // imm6 = xxxxxx } // Neon Shift Long operations, // element sizes of 8, 16, 32 bits: multiclass N2VLSh_QHS op11_8, bit op7, bit op6, - bit op4, string OpcodeStr, SDNode OpNode> { + bit op4, string OpcodeStr, string Dt, SDNode OpNode> { def v8i16 : N2VLSh { + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i32 : N2VLSh { + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i64 : N2VLSh { + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -1695,18 +1775,18 @@ multiclass N2VLSh_QHS op11_8, bit op7, bit op6, // Neon Shift Narrow operations, // element sizes of 16, 32, 64 bits: multiclass N2VNSh_HSD op11_8, bit op7, bit op6, - bit op4, InstrItinClass itin, string OpcodeStr, + bit op4, InstrItinClass itin, string OpcodeStr, string Dt, SDNode OpNode> { def v8i8 : N2VNSh { + OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VNSh { + OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VNSh { + OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -1718,56 +1798,58 @@ multiclass N2VNSh_HSD op11_8, bit op7, bit op6, // Vector Add Operations. // VADD : Vector Add (integer and floating-point) -defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", +defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", add, 1>; -def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", +def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", v2f32, v2f32, fadd, 1>; -def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", +def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) -defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; -defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; +defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; +defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; + IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>; defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; + IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>; // VRHADD : Vector Rounding Halving Add defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; + IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>; defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; + IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>; // VQADD : Vector Saturating Add defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; + IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>; defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; + IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", + int_arm_neon_vaddhn, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) -defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; +defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", + int_arm_neon_vraddhn, 1>; // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, - IIC_VMULi16Q, IIC_VMULi32Q, "vmul.i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", + IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; -def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", +def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", v2f32, v2f32, fmul, 1>; -def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", +def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; -def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; -def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), @@ -1790,10 +1872,10 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqdmulh.s", int_arm_neon_vqdmulh, 1>; + "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqdmulh.s", int_arm_neon_vqdmulh>; + "vqdmulh", "s", int_arm_neon_vqdmulh>; def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), @@ -1812,10 +1894,10 @@ def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; + "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqrdmulh.s", int_arm_neon_vqrdmulh>; + "vqrdmulh", "s", int_arm_neon_vqrdmulh>; def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), @@ -1832,37 +1914,37 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u", int_arm_neon_vmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", +def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; -defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", +defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", int_arm_neon_vmulls>; -defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", +defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s", int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s", int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. // VMLA : Vector Multiply Accumulate (integer and floating-point) defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; -def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", + IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; +def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul, fadd>; -def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", +def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul, fadd>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; -def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", + IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; +def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", v2f32, fmul, fadd>; -def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", +def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", v4f32, v2f32, fmul, fadd>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), @@ -1893,28 +1975,29 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>; -defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>; -defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>; +defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) -defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s", + int_arm_neon_vqdmlal>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; -def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", + IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; +def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul, fsub>; -def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", +def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul, fsub>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; -def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", + IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; +def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", v2f32, fmul, fsub>; -def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", +def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", v4f32, v2f32, fmul, fsub>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), @@ -1945,167 +2028,170 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>; -defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>; -defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>; +defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) -defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>; +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s", + int_arm_neon_vqdmlsl>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, - "vsub.i", sub, 0>; -def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", + "vsub", "i", sub, 0>; +def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", v2f32, v2f32, fsub, 0>; -def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", +def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) -defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; -defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; +defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; +defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vhsub.s", int_arm_neon_vhsubs, 0>; + "vhsub", "s", int_arm_neon_vhsubs, 0>; defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vhsub.u", int_arm_neon_vhsubu, 0>; + "vhsub", "u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vqsub.s", int_arm_neon_vqsubs, 0>; + "vqsub", "s", int_arm_neon_vqsubs, 0>; defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vqsub.u", int_arm_neon_vqsubu, 0>; + "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", + int_arm_neon_vsubhn, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) -defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; +defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", + int_arm_neon_vrsubhn, 0>; // Vector Comparisons. // VCEQ : Vector Compare Equal defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vceq.i", NEONvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, + IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>; +def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, +def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge.s", NEONvcge, 0>; + IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", + IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; +def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, +def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>; + IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, + IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>; +def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, +def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vtst.i", NEONvtst, 1>; + IIC_VBINi4Q, "vtst", "i", NEONvtst, 1>; // Vector Bitwise Operations. // VAND : Vector Bitwise AND -def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", - v2i32, v2i32, and, 1>; -def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", - v4i32, v4i32, and, 1>; +def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", + v2i32, v2i32, and, 1>; +def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", + v4i32, v4i32, and, 1>; // VEOR : Vector Bitwise Exclusive OR -def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", - v2i32, v2i32, xor, 1>; -def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", - v4i32, v4i32, xor, 1>; +def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", + v2i32, v2i32, xor, 1>; +def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", + v4i32, v4i32, xor, 1>; // VORR : Vector Bitwise OR -def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", - v2i32, v2i32, or, 1>; -def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", - v4i32, v4i32, or, 1>; +def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", + v2i32, v2i32, or, 1>; +def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", + v4i32, v4i32, or, 1>; // VBIC : Vector Bitwise Bit Clear (AND NOT) -def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), +def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vbic", "\t$dst, $src1, $src2", "", + "vbic", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (and DPR:$src1, (vnot_conv DPR:$src2))))]>; -def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), +def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vbic", "\t$dst, $src1, $src2", "", + "vbic", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (and QPR:$src1, (vnot_conv QPR:$src2))))]>; // VORN : Vector Bitwise OR NOT -def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), +def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vorn", "\t$dst, $src1, $src2", "", + "vorn", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot_conv DPR:$src2))))]>; -def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), +def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vorn", "\t$dst, $src1, $src2", "", + "vorn", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot_conv QPR:$src2))))]>; // VMVN : Vector Bitwise NOT -def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, +def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, - "vmvn", "\t$dst, $src", "", + "vmvn", "$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; -def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, +def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, - "vmvn", "\t$dst, $src", "", + "vmvn", "$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select -def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), +def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, - "vbsl", "\t$dst, $src2, $src3", "$src1 = $dst", + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (v2i32 (or (and DPR:$src2, DPR:$src1), (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; -def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), +def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, - "vbsl", "\t$dst, $src2, $src3", "$src1 = $dst", + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (v4i32 (or (and QPR:$src2, QPR:$src1), (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False -// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", +// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", // VBIT : Vector Bitwise Insert if True -// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", +// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", // These are not yet implemented. The TwoAddress pass will not go looking // for equivalent operations with different register constraints; it just // inserts copies. @@ -2115,261 +2201,268 @@ def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vabd.s", int_arm_neon_vabds, 0>; + "vabd", "s", int_arm_neon_vabds, 0>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vabd.u", int_arm_neon_vabdu, 0>; + "vabd", "u", int_arm_neon_vabdu, 0>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, - "vabd.f32", v2f32, v2f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, - "vabd.f32", v4f32, v4f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, - "vabdl.s", int_arm_neon_vabdls, 0>; + "vabdl", "s", int_arm_neon_vabdls, 0>; defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, - "vabdl.u", int_arm_neon_vabdlu, 0>; + "vabdl", "u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba.s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba.u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; +defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>; +defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; + IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, - int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, - int_arm_neon_vmaxs, 1>; + IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32", + v2f32, v2f32, int_arm_neon_vmaxs, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32", + v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; + IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>; defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, - int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, - int_arm_neon_vmins, 1>; + IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32", + v2f32, v2f32, int_arm_neon_vmins, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32", + v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, - int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, - int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, - int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, - int_arm_neon_vpadd, 0>; +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8", + v8i8, v8i8, int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16", + v4i16, v4i16, int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32", + v2i32, v2i32, int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32", + v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long -defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", +defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", int_arm_neon_vpaddls>; -defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", +defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", int_arm_neon_vpaddlu>; // VPADAL : Vector Pairwise Add and Accumulate Long -defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s", +defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", int_arm_neon_vpadals>; -defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u", +defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, - int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, - int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, - int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, - int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, - int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, - int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, - int_arm_neon_vpmaxs, 0>; +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8", + v8i8, v8i8, int_arm_neon_vpmaxs, 0>; +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16", + v4i16, v4i16, int_arm_neon_vpmaxs, 0>; +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32", + v2i32, v2i32, int_arm_neon_vpmaxs, 0>; +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8", + v8i8, v8i8, int_arm_neon_vpmaxu, 0>; +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16", + v4i16, v4i16, int_arm_neon_vpmaxu, 0>; +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32", + v2i32, v2i32, int_arm_neon_vpmaxu, 0>; +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32", + v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, - int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, - int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, - int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, - int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, - int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, - int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, - int_arm_neon_vpmins, 0>; +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8", + v8i8, v8i8, int_arm_neon_vpmins, 0>; +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16", + v4i16, v4i16, int_arm_neon_vpmins, 0>; +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32", + v2i32, v2i32, int_arm_neon_vpmins, 0>; +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8", + v8i8, v8i8, int_arm_neon_vpminu, 0>; +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16", + v4i16, v4i16, int_arm_neon_vpminu, 0>; +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32", + v2i32, v2i32, int_arm_neon_vpminu, 0>; +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32", + v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. // VRECPE : Vector Reciprocal Estimate def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, - IIC_VUNAD, "vrecpe.u32", + IIC_VUNAD, "vrecpe", "u32", v2i32, v2i32, int_arm_neon_vrecpe>; def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, - IIC_VUNAQ, "vrecpe.u32", + IIC_VUNAQ, "vrecpe", "u32", v4i32, v4i32, int_arm_neon_vrecpe>; def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, - IIC_VUNAD, "vrecpe.f32", + IIC_VUNAD, "vrecpe", "f32", v2f32, v2f32, int_arm_neon_vrecpe>; def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, - IIC_VUNAQ, "vrecpe.f32", + IIC_VUNAQ, "vrecpe", "f32", v4f32, v4f32, int_arm_neon_vrecpe>; // VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, - int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, - int_arm_neon_vrecps, 1>; +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, + IIC_VRECSD, "vrecps", "f32", + v2f32, v2f32, int_arm_neon_vrecps, 1>; +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, + IIC_VRECSQ, "vrecps", "f32", + v4f32, v4f32, int_arm_neon_vrecps, 1>; // VRSQRTE : Vector Reciprocal Square Root Estimate def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, - IIC_VUNAD, "vrsqrte.u32", + IIC_VUNAD, "vrsqrte", "u32", v2i32, v2i32, int_arm_neon_vrsqrte>; def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, - IIC_VUNAQ, "vrsqrte.u32", + IIC_VUNAQ, "vrsqrte", "u32", v4i32, v4i32, int_arm_neon_vrsqrte>; def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, - IIC_VUNAD, "vrsqrte.f32", + IIC_VUNAD, "vrsqrte", "f32", v2f32, v2f32, int_arm_neon_vrsqrte>; def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, - IIC_VUNAQ, "vrsqrte.f32", + IIC_VUNAQ, "vrsqrte", "f32", v4f32, v4f32, int_arm_neon_vrsqrte>; // VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, - int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, - int_arm_neon_vrsqrts, 1>; +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, + IIC_VRECSD, "vrsqrts", "f32", + v2f32, v2f32, int_arm_neon_vrsqrts, 1>; +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, + IIC_VRECSQ, "vrsqrts", "f32", + v4f32, v4f32, int_arm_neon_vrsqrts, 1>; // Vector Shifts. // VSHL : Vector Shift defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; + IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>; defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; + IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>; // VSHLL : Vector Shift Left Long -defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll.s", NEONvshlls>; -defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll.u", NEONvshllu>; +defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; +defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, + bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2VLSh { + : N2VLSh { let Inst{21-16} = op21_16; } -def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", +def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", v8i16, v8i8, NEONvshlli>; -def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", +def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", v4i32, v4i16, NEONvshlli>; -def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", +def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn.i", NEONvshrn>; +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; + IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>; defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; + IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn.i", +defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", NEONvrshrn>; // VQSHL : Vector Saturating Shift defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; + IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>; defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; + IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.s", +defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", NEONvqshrns>; -defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.u", +defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun.s", +defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; + IIC_VSHLi4Q, "vqrshl", "s", + int_arm_neon_vqrshifts, 0>; defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; + IIC_VSHLi4Q, "vqrshl", "u", + int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.s", +defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", NEONvqrshrns>; -defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.u", +defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun.s", +defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate -defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; -defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; +defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; +defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; // VRSRA : Vector Rounding Shift Right and Accumulate -defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; -defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; +defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; +defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; // VSLI : Vector Shift Left and Insert -defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; +defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>; // VSRI : Vector Shift Right and Insert -defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; +defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>; // Vector Absolute and Saturating Absolute. // VABS : Vector Absolute Value defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, - IIC_VUNAiD, IIC_VUNAiQ, "vabs.s", + IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs.f32", + IIC_VUNAD, "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>; def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAQ, "vabs.f32", + IIC_VUNAQ, "vabs", "f32", v4f32, v4f32, int_arm_neon_vabs>; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, - IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s", + IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", int_arm_neon_vqabs>; // Vector Negate. @@ -2377,31 +2470,31 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; -class VNEGD size, string OpcodeStr, ValueType Ty> +class VNEGD size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), - IIC_VSHLiD, OpcodeStr, "\t$dst, $src", "", + IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; -class VNEGQ size, string OpcodeStr, ValueType Ty> +class VNEGQ size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), - IIC_VSHLiD, OpcodeStr, "\t$dst, $src", "", + IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; // VNEG : Vector Negate -def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; -def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; -def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; -def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; -def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; -def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; +def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; +def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; +def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; +def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; +def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; +def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; // VNEG : Vector Negate (floating-point) def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, - "vneg.f32", "\t$dst, $src", "", + "vneg", "f32", "$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, - "vneg.f32", "\t$dst, $src", "", + "vneg", "f32", "$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; @@ -2413,35 +2506,35 @@ def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, - IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s", + IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", int_arm_neon_vqneg>; // Vector Bit Counting Operations. // VCLS : Vector Count Leading Sign Bits defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, - IIC_VCNTiD, IIC_VCNTiQ, "vcls.s", + IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", int_arm_neon_vcls>; // VCLZ : Vector Count Leading Zeros defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, - IIC_VCNTiD, IIC_VCNTiQ, "vclz.i", + IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", int_arm_neon_vclz>; // VCNT : Vector Count One Bits def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, - IIC_VCNTiD, "vcnt.8", + IIC_VCNTiD, "vcnt", "8", v8i8, v8i8, int_arm_neon_vcnt>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, - IIC_VCNTiQ, "vcnt.8", + IIC_VCNTiQ, "vcnt", "8", v16i8, v16i8, int_arm_neon_vcnt>; // Vector Move Operations. // VMOV : Vector Move (Register) -def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - IIC_VMOVD, "vmov", "\t$dst, $src", "", []>; -def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - IIC_VMOVD, "vmov", "\t$dst, $src", "", []>; +def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), + IIC_VMOVD, "vmov", "$dst, $src", "", []>; +def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), + IIC_VMOVD, "vmov", "$dst, $src", "", []>; // VMOV : Vector Move (Immediate) @@ -2482,65 +2575,65 @@ def vmovImm64 : PatLeaf<(build_vector), [{ def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, - "vmov.i8", "\t$dst, $SIMM", "", + "vmov", "i8", "$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, - "vmov.i8", "\t$dst, $SIMM", "", + "vmov", "i8", "$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), (ins h16imm:$SIMM), IIC_VMOVImm, - "vmov.i16", "\t$dst, $SIMM", "", + "vmov", "i16", "$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), (ins h16imm:$SIMM), IIC_VMOVImm, - "vmov.i16", "\t$dst, $SIMM", "", + "vmov", "i16", "$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), (ins h32imm:$SIMM), IIC_VMOVImm, - "vmov.i32", "\t$dst, $SIMM", "", + "vmov", "i32", "$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), (ins h32imm:$SIMM), IIC_VMOVImm, - "vmov.i32", "\t$dst, $SIMM", "", + "vmov", "i32", "$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, - "vmov.i64", "\t$dst, $SIMM", "", + "vmov", "i64", "$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, - "vmov.i64", "\t$dst, $SIMM", "", + "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; // VMOV : Vector Get Lane (move scalar to ARM core register) def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), imm:$lane))]>; def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), imm:$lane))]>; def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), imm:$lane))]>; def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), imm:$lane))]>; def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]", [(set GPR:$dst, (extractelt (v2i32 DPR:$src), imm:$lane))]>; // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -2581,17 +2674,17 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), let Constraints = "$src1 = $dst" in { def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), GPR:$src2, imm:$lane))]>; def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), GPR:$src2, imm:$lane))]>; def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2", [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), GPR:$src2, imm:$lane))]>; } @@ -2655,56 +2748,57 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)), // VDUP : Vector Duplicate (from ARM core register to all elements) -class VDUPD opcod1, bits<2> opcod3, string asmSize, ValueType Ty> +class VDUPD opcod1, bits<2> opcod3, string Dt, ValueType Ty> : NVDup; -class VDUPQ opcod1, bits<2> opcod3, string asmSize, ValueType Ty> +class VDUPQ opcod1, bits<2> opcod3, string Dt, ValueType Ty> : NVDup; -def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; -def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; -def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; -def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; -def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; +def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; +def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; +def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; +def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", "32", "$dst, $src", [(set DPR:$dst, (v2f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", "32", "$dst, $src", [(set QPR:$dst, (v4f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; // VDUP : Vector Duplicate Lane (from scalar to all elements) -class VDUPLND op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> +class VDUPLND op19_18, bits<2> op17_16, + string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - OpcodeStr, "\t$dst, $src[$lane]", "", + OpcodeStr, Dt, "$dst, $src[$lane]", "", [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -class VDUPLNQ op19_18, bits<2> op17_16, string OpcodeStr, +class VDUPLNQ op19_18, bits<2> op17_16, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - OpcodeStr, "\t$dst, $src[$lane]", "", + OpcodeStr, Dt, "$dst, $src[$lane]", "", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; // Inst{19-16} is partially specified depending on the element size. -def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup.8", v8i8>; -def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup.16", v4i16>; -def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup.32", v2i32>; -def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup.32", v2f32>; -def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup.8", v16i8, v8i8>; -def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup.16", v8i16, v4i16>; -def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup.32", v4i32, v2i32>; -def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup.32", v4f32, v2f32>; +def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>; +def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>; +def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>; +def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>; +def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>; +def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>; +def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>; +def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>; def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -2725,12 +2819,12 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0, (outs DPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32", "\t$dst, ${src:lane}", "", + IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, (outs QPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32", "\t$dst, ${src:lane}", "", + IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), @@ -2743,176 +2837,178 @@ def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), (DSubReg_f64_other_reg imm:$lane))>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i", - int_arm_neon_vmovn>; +defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, + "vmovn", "i", int_arm_neon_vmovn>; // VQMOVN : Vector Saturating Narrowing Move -defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s", - int_arm_neon_vqmovns>; -defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", - int_arm_neon_vqmovnu>; -defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", - int_arm_neon_vqmovnsu>; +defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, + "vqmovn", "s", int_arm_neon_vqmovns>; +defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, + "vqmovn", "u", int_arm_neon_vqmovnu>; +defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, + "vqmovun", "s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl.s", int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl.u", int_arm_neon_vmovlu>; +defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s", + int_arm_neon_vmovls>; +defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u", + int_arm_neon_vmovlu>; // Vector Conversions. // VCVT : Vector Convert Between Floating-Point and Integers -def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v2i32, v2f32, fp_to_sint>; -def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v2i32, v2f32, fp_to_uint>; -def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v2f32, v2i32, sint_to_fp>; -def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v2f32, v2i32, uint_to_fp>; -def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v4i32, v4f32, fp_to_sint>; -def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v4i32, v4f32, fp_to_uint>; -def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v4f32, v4i32, sint_to_fp>; -def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. -def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; -def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; // Vector Reverse. // VREV64 : Vector Reverse elements within 64-bit doublewords -class VREV64D op19_18, string OpcodeStr, ValueType Ty> +class VREV64D op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - OpcodeStr, "\t$dst, $src", "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; -class VREV64Q op19_18, string OpcodeStr, ValueType Ty> +class VREV64Q op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - OpcodeStr, "\t$dst, $src", "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; -def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>; -def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>; -def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>; -def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>; +def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; +def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; +def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; +def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; -def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>; -def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>; -def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>; -def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; +def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; +def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; +def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; +def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; // VREV32 : Vector Reverse elements within 32-bit words -class VREV32D op19_18, string OpcodeStr, ValueType Ty> +class VREV32D op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - OpcodeStr, "\t$dst, $src", "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; -class VREV32Q op19_18, string OpcodeStr, ValueType Ty> +class VREV32Q op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - OpcodeStr, "\t$dst, $src", "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; -def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>; -def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>; +def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; +def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; -def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>; -def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; +def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; +def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; // VREV16 : Vector Reverse elements within 16-bit halfwords -class VREV16D op19_18, string OpcodeStr, ValueType Ty> +class VREV16D op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - OpcodeStr, "\t$dst, $src", "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; -class VREV16Q op19_18, string OpcodeStr, ValueType Ty> +class VREV16Q op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - OpcodeStr, "\t$dst, $src", "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; -def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; -def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; +def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; +def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; // Other Vector Shuffles. // VEXT : Vector Extract -class VEXTd +class VEXTd : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, - OpcodeStr, "\t$dst, $lhs, $rhs, $index", "", + OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), (Ty DPR:$rhs), imm:$index)))]>; -class VEXTq +class VEXTq : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, - OpcodeStr, "\t$dst, $lhs, $rhs, $index", "", + OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), (Ty QPR:$rhs), imm:$index)))]>; -def VEXTd8 : VEXTd<"vext.8", v8i8>; -def VEXTd16 : VEXTd<"vext.16", v4i16>; -def VEXTd32 : VEXTd<"vext.32", v2i32>; -def VEXTdf : VEXTd<"vext.32", v2f32>; +def VEXTd8 : VEXTd<"vext", "8", v8i8>; +def VEXTd16 : VEXTd<"vext", "16", v4i16>; +def VEXTd32 : VEXTd<"vext", "32", v2i32>; +def VEXTdf : VEXTd<"vext", "32", v2f32>; -def VEXTq8 : VEXTq<"vext.8", v16i8>; -def VEXTq16 : VEXTq<"vext.16", v8i16>; -def VEXTq32 : VEXTq<"vext.32", v4i32>; -def VEXTqf : VEXTq<"vext.32", v4f32>; +def VEXTq8 : VEXTq<"vext", "8", v16i8>; +def VEXTq16 : VEXTq<"vext", "16", v8i16>; +def VEXTq32 : VEXTq<"vext", "32", v4i32>; +def VEXTqf : VEXTq<"vext", "32", v4f32>; // VTRN : Vector Transpose -def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; -def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">; -def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">; +def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; +def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; +def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; -def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">; -def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">; -def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">; +def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; +def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; +def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; // VUZP : Vector Unzip (Deinterleave) -def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">; -def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">; +def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; +def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; +def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; -def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">; -def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">; -def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">; +def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; +def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; +def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; // VZIP : Vector Zip (Interleave) -def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">; -def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">; +def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; +def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; +def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; -def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">; -def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">; -def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; +def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; +def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; +def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; // Vector Table Lookup and Table Extension. @@ -2920,25 +3016,25 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$src), IIC_VTB1, - "vtbl.8", "\t$dst, \\{$tbl1\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, - "vtbl.8", "\t$dst, \\{$tbl1,$tbl2\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, - "vtbl.8", "\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, - "vtbl.8", "\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; } // hasExtraSrcRegAllocReq = 1 @@ -2947,26 +3043,26 @@ def VTBL4 def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, - "vtbx.8", "\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, - "vtbx.8", "\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, - "vtbx.8", "\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, - "vtbx.8", "\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; } // hasExtraSrcRegAllocReq = 1 @@ -2980,17 +3076,17 @@ def VTBX4 // Vector Add Operations used for single-precision FP let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>; +def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>; def : N3VDsPat; // Vector Sub Operations used for single-precision FP let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; +def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>; def : N3VDsPat; // Vector Multiply Operations used for single-precision FP let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; +def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>; def : N3VDsPat; // Vector Multiply-Accumulate/Subtract used for single-precision FP @@ -2998,17 +3094,17 @@ def : N3VDsPat; // we want to avoid them for now. e.g., alternating vmla/vadd instructions. //let neverHasSideEffects = 1 in -//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; +//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>; //def : N3VDMulOpsPat; //let neverHasSideEffects = 1 in -//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; +//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>; //def : N3VDMulOpsPat; // Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs.f32", + IIC_VUNAD, "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>; def : N2VDIntsPat; @@ -3016,27 +3112,27 @@ def : N2VDIntsPat; let neverHasSideEffects = 1 in def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vneg.f32", "\t$dst, $src", "", []>; + "vneg", "f32", "$dst, $src", "", []>; def : N2VDIntsPat; // Vector Convert between single-precision FP and integer let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v2i32, v2f32, fp_to_sint>; def : N2VDsPat; let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v2i32, v2f32, fp_to_uint>; def : N2VDsPat; let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v2f32, v2i32, sint_to_fp>; def : N2VDsPat; let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v2f32, v2i32, uint_to_fp>; def : N2VDsPat;