diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a06b72a25178..31cf1963b801 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3380,6 +3380,18 @@ multiclass SS3I_unop_rm_int opc, string OpcodeStr, (bitconvert (mem_frag128 addr:$src))))]>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in { + defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8, + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>, VEX; + defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16, + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>, VEX; + defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32, + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>, VEX; +} + defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8, int_x86_ssse3_pabs_b, int_x86_ssse3_pabs_b_128>; @@ -3433,6 +3445,47 @@ multiclass SS3I_binop_rm_int opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in { +let isCommutable = 0 in { + defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128, 0>, VEX_4V; + defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32, + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128, 0>, VEX_4V; + defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128, 0>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32, + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128, 0>, VEX_4V; + defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; + defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8, + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8, + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8, + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128, 0>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16, + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128, 0>, VEX_4V; + defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32, + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128, 0>, VEX_4V; +} +defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16, + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; +} + // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { @@ -3484,26 +3537,43 @@ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -let Constraints = "$src1 = $dst" in { - def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; - def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; +multiclass sse3_palign { + def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>; + def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>; - def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, OpSize; - def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, OpSize; + def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; + def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in + defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PALIGN : sse3_palign<"palignr">; + let AddedComplexity = 5 in { def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), diff --git a/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/X86MCCodeEmitter.cpp index 9f36aaa28c7c..f60e73968dc0 100644 --- a/llvm/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/X86MCCodeEmitter.cpp @@ -543,7 +543,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); - if (VEX_B && VEX_X && !VEX_W) { // 2 byte VEX prefix + if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix EmitByte(0xC5, CurByte, OS); EmitByte(LastByte | (VEX_R << 7), CurByte, OS); return; diff --git a/llvm/test/MC/AsmParser/X86/x86_32-encoding.s b/llvm/test/MC/AsmParser/X86/x86_32-encoding.s index b2789cda3f17..6854d40f2301 100644 --- a/llvm/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/llvm/test/MC/AsmParser/X86/x86_32-encoding.s @@ -11614,3 +11614,131 @@ // CHECK: encoding: [0xc5,0xe9,0x7d,0x18] vhsubpd (%eax), %xmm2, %xmm3 +// CHECK: vpabsb %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1] + vpabsb %xmm1, %xmm2 + +// CHECK: vpabsb (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10] + vpabsb (%eax), %xmm2 + +// CHECK: vpabsw %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1] + vpabsw %xmm1, %xmm2 + +// CHECK: vpabsw (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10] + vpabsw (%eax), %xmm2 + +// CHECK: vpabsd %xmm1, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1] + vpabsd %xmm1, %xmm2 + +// CHECK: vpabsd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10] + vpabsd (%eax), %xmm2 + +// CHECK: vphaddw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9] + vphaddw %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18] + vphaddw (%eax), %xmm2, %xmm3 + +// CHECK: vphaddd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9] + vphaddd %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18] + vphaddd (%eax), %xmm2, %xmm3 + +// CHECK: vphaddsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9] + vphaddsw %xmm1, %xmm2, %xmm3 + +// CHECK: vphaddsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18] + vphaddsw (%eax), %xmm2, %xmm3 + +// CHECK: vphsubw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9] + vphsubw %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18] + vphsubw (%eax), %xmm2, %xmm3 + +// CHECK: vphsubd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9] + vphsubd %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18] + vphsubd (%eax), %xmm2, %xmm3 + +// CHECK: vphsubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9] + vphsubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vphsubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18] + vphsubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9] + vpmaddubsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18] + vpmaddubsw (%eax), %xmm2, %xmm3 + +// CHECK: vpshufb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9] + vpshufb %xmm1, %xmm2, %xmm3 + +// CHECK: vpshufb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18] + vpshufb (%eax), %xmm2, %xmm3 + +// CHECK: vpsignb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9] + vpsignb %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18] + vpsignb (%eax), %xmm2, %xmm3 + +// CHECK: vpsignw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9] + vpsignw %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18] + vpsignw (%eax), %xmm2, %xmm3 + +// CHECK: vpsignd %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9] + vpsignd %xmm1, %xmm2, %xmm3 + +// CHECK: vpsignd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18] + vpsignd (%eax), %xmm2, %xmm3 + +// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9] + vpmulhrsw %xmm1, %xmm2, %xmm3 + +// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18] + vpmulhrsw (%eax), %xmm2, %xmm3 + +// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07] + vpalignr $7, %xmm1, %xmm2, %xmm3 + +// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07] + vpalignr $7, (%eax), %xmm2, %xmm3 + diff --git a/llvm/test/MC/AsmParser/X86/x86_64-encoding.s b/llvm/test/MC/AsmParser/X86/x86_64-encoding.s index 8be9d67d9928..af33f74ed8a4 100644 --- a/llvm/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/llvm/test/MC/AsmParser/X86/x86_64-encoding.s @@ -1662,3 +1662,131 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0x19,0x7d,0x28] vhsubpd (%rax), %xmm12, %xmm13 +// CHECK: vpabsb %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3] + vpabsb %xmm11, %xmm12 + +// CHECK: vpabsb (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20] + vpabsb (%rax), %xmm12 + +// CHECK: vpabsw %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3] + vpabsw %xmm11, %xmm12 + +// CHECK: vpabsw (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20] + vpabsw (%rax), %xmm12 + +// CHECK: vpabsd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3] + vpabsd %xmm11, %xmm12 + +// CHECK: vpabsd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20] + vpabsd (%rax), %xmm12 + +// CHECK: vphaddw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb] + vphaddw %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28] + vphaddw (%rax), %xmm12, %xmm13 + +// CHECK: vphaddd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb] + vphaddd %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28] + vphaddd (%rax), %xmm12, %xmm13 + +// CHECK: vphaddsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb] + vphaddsw %xmm11, %xmm12, %xmm13 + +// CHECK: vphaddsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28] + vphaddsw (%rax), %xmm12, %xmm13 + +// CHECK: vphsubw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb] + vphsubw %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28] + vphsubw (%rax), %xmm12, %xmm13 + +// CHECK: vphsubd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb] + vphsubd %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28] + vphsubd (%rax), %xmm12, %xmm13 + +// CHECK: vphsubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb] + vphsubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vphsubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28] + vphsubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb] + vpmaddubsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28] + vpmaddubsw (%rax), %xmm12, %xmm13 + +// CHECK: vpshufb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb] + vpshufb %xmm11, %xmm12, %xmm13 + +// CHECK: vpshufb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28] + vpshufb (%rax), %xmm12, %xmm13 + +// CHECK: vpsignb %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb] + vpsignb %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28] + vpsignb (%rax), %xmm12, %xmm13 + +// CHECK: vpsignw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb] + vpsignw %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28] + vpsignw (%rax), %xmm12, %xmm13 + +// CHECK: vpsignd %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb] + vpsignd %xmm11, %xmm12, %xmm13 + +// CHECK: vpsignd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28] + vpsignd (%rax), %xmm12, %xmm13 + +// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb] + vpmulhrsw %xmm11, %xmm12, %xmm13 + +// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28] + vpmulhrsw (%rax), %xmm12, %xmm13 + +// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07] + vpalignr $7, %xmm11, %xmm12, %xmm13 + +// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07] + vpalignr $7, (%rax), %xmm12, %xmm13 +