From 15c97706e8c4cb89522ae2dece5da6797f88db7a Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 21 Oct 2010 18:09:17 +0000 Subject: [PATCH] Provide correct NEON encodings for vaddl.u* and vaddl.s*. llvm-svn: 117039 --- llvm/lib/Target/ARM/ARMInstrNEON.td | 47 +++++++++++++------- llvm/test/MC/ARM/neon-fp-encoding.ll | 65 ++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index b9e819403e08..7a63085a9d0b 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -1232,23 +1232,23 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + (outs QPR:$Qd), (ins QPR:$Qn, QPR:$Qm), N3RegFrm, itin, + OpcodeStr, Dt, "$Qd, $Qn, $Qm", "", + [(set QPR:$Qd, (ResTy (OpNode (OpTy QPR:$Qn), (OpTy QPR:$Qm))))]> { let isCommutable = Commutable; - bits<4> Dd; - bits<4> Dn; - bits<4> Dm; + bits<4> Qd; + bits<4> Qn; + bits<4> Qm; - let Inst{15-13} = Dd{2-0}; - let Inst{22} = Dd{3}; + let Inst{15-13} = Qd{2-0}; + let Inst{22} = Qd{3}; let Inst{12} = 0; - let Inst{19-17} = Dn{2-0}; - let Inst{7} = Dn{3}; + let Inst{19-17} = Qn{2-0}; + let Inst{7} = Qn{3}; let Inst{16} = 0; - let Inst{3-1} = Dm{2-0}; - let Inst{5} = Dm{3}; + let Inst{3-1} = Qm{2-0}; + let Inst{5} = Qm{3}; let Inst{0} = 0; } class N3VQX op21_20, bits<4> op11_8, bit op4, @@ -1597,10 +1597,25 @@ class N3VLExt op21_20, bits<4> op11_8, bit op4, ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, bit Commutable> : N3V { + (outs QPR:$Qd), (ins DPR:$Dn, DPR:$Dm), N3RegFrm, itin, + OpcodeStr, Dt, "$Qd, $Dn, $Dm", "", + [(set QPR:$Qd, (OpNode (TyQ (ExtOp (TyD DPR:$Dn))), + (TyQ (ExtOp (TyD DPR:$Dm)))))]> { + let isCommutable = Commutable; + + // Instruction operands. + bits<4> Qd; + bits<5> Dn; + bits<5> Dm; + + let Inst{15-13} = Qd{2-0}; + let Inst{22} = Qd{3}; + let Inst{12} = 0; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let isCommutable = Commutable; } diff --git a/llvm/test/MC/ARM/neon-fp-encoding.ll b/llvm/test/MC/ARM/neon-fp-encoding.ll index 3da1ec9d5af9..528803643393 100644 --- a/llvm/test/MC/ARM/neon-fp-encoding.ll +++ b/llvm/test/MC/ARM/neon-fp-encoding.ll @@ -54,3 +54,68 @@ define <4 x float> @vadd_4xfloat(<4 x float>* %A, <4 x float>* %B) nounwind { ret <4 x float> %tmp3 } +; CHECK: vaddls_8xi8 +define <8 x i16> @vaddls_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> +; CHECK: vaddl.s8 q8, d17, d16 @ encoding: [0xa0,0x00,0xc1,0xf2] + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +; CHECK: vaddls_4xi16 +define <4 x i32> @vaddls_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> +; CHECK: vaddl.s16 q8, d17, d16 @ encoding: [0xa0,0x00,0xd1,0xf2] + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +; CHECK: vaddls_2xi32 +define <2 x i64> @vaddls_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> +; CHECK: vaddl.s32 q8, d17, d16 @ encoding: [0xa0,0x00,0xe1,0xf2] + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +; CHECK: vaddlu_8xi8 +define <8 x i16> @vaddlu_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> +; CHECK: vaddl.u8 q8, d17, d16 @ encoding: [0xa0,0x00,0xc1,0xf3] + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +; CHECK: vaddlu_4xi16 +define <4 x i32> @vaddlu_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> +; CHECK: vaddl.u16 q8, d17, d16 @ encoding: [0xa0,0x00,0xd1,0xf3] + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +; CHECK: vaddlu_2xi32 +define <2 x i64> @vaddlu_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> +; CHECK: vaddl.u32 q8, d17, d16 @ encoding: [0xa0,0x00,0xe1,0xf3] + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +}