- Add support for the rest of AVX SSE3 instructions

- Fix VEX prefix to be emitted with 3 bytes whenever VEX_5M
represents a REX equivalent two byte leading opcode

llvm-svn: 107523
This commit is contained in:
Bruno Cardoso Lopes 2010-07-02 22:06:54 +00:00
parent 85d6948f3d
commit c7111fd355
4 changed files with 344 additions and 18 deletions

View File

@ -3380,6 +3380,18 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (mem_frag128 addr:$src))))]>, OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8,
int_x86_ssse3_pabs_b,
int_x86_ssse3_pabs_b_128>, VEX;
defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16,
int_x86_ssse3_pabs_w,
int_x86_ssse3_pabs_w_128>, VEX;
defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32,
int_x86_ssse3_pabs_d,
int_x86_ssse3_pabs_d_128>, VEX;
}
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
int_x86_ssse3_pabs_b,
int_x86_ssse3_pabs_b_128>;
@ -3433,6 +3445,47 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16,
int_x86_ssse3_phadd_w,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32,
int_x86_ssse3_phadd_d,
int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16,
int_x86_ssse3_phadd_sw,
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16,
int_x86_ssse3_phsub_w,
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32,
int_x86_ssse3_phsub_d,
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16,
int_x86_ssse3_phsub_sw,
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8,
int_x86_ssse3_pmadd_ub_sw,
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8,
int_x86_ssse3_pshuf_b,
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8,
int_x86_ssse3_psign_b,
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16,
int_x86_ssse3_psign_w,
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32,
int_x86_ssse3_psign_d,
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16,
int_x86_ssse3_pmul_hr_sw,
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
@ -3484,26 +3537,43 @@ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>;
def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>;
multiclass sse3_palign<string asm, bit Is2Addr = 1> {
def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[]>;
def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[]>;
def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, OpSize;
def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>, OpSize;
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[]>, OpSize;
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[]>, OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE3] in
defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PALIGN : sse3_palign<"palignr">;
let AddedComplexity = 5 in {
def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),

View File

@ -543,7 +543,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
//
unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
if (VEX_B && VEX_X && !VEX_W) { // 2 byte VEX prefix
if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
EmitByte(0xC5, CurByte, OS);
EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
return;

View File

@ -11614,3 +11614,131 @@
// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
vhsubpd (%eax), %xmm2, %xmm3
// CHECK: vpabsb %xmm1, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1]
vpabsb %xmm1, %xmm2
// CHECK: vpabsb (%eax), %xmm2
// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10]
vpabsb (%eax), %xmm2
// CHECK: vpabsw %xmm1, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1]
vpabsw %xmm1, %xmm2
// CHECK: vpabsw (%eax), %xmm2
// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10]
vpabsw (%eax), %xmm2
// CHECK: vpabsd %xmm1, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1]
vpabsd %xmm1, %xmm2
// CHECK: vpabsd (%eax), %xmm2
// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10]
vpabsd (%eax), %xmm2
// CHECK: vphaddw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9]
vphaddw %xmm1, %xmm2, %xmm3
// CHECK: vphaddw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18]
vphaddw (%eax), %xmm2, %xmm3
// CHECK: vphaddd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9]
vphaddd %xmm1, %xmm2, %xmm3
// CHECK: vphaddd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18]
vphaddd (%eax), %xmm2, %xmm3
// CHECK: vphaddsw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9]
vphaddsw %xmm1, %xmm2, %xmm3
// CHECK: vphaddsw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18]
vphaddsw (%eax), %xmm2, %xmm3
// CHECK: vphsubw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9]
vphsubw %xmm1, %xmm2, %xmm3
// CHECK: vphsubw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18]
vphsubw (%eax), %xmm2, %xmm3
// CHECK: vphsubd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9]
vphsubd %xmm1, %xmm2, %xmm3
// CHECK: vphsubd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18]
vphsubd (%eax), %xmm2, %xmm3
// CHECK: vphsubsw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9]
vphsubsw %xmm1, %xmm2, %xmm3
// CHECK: vphsubsw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18]
vphsubsw (%eax), %xmm2, %xmm3
// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9]
vpmaddubsw %xmm1, %xmm2, %xmm3
// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18]
vpmaddubsw (%eax), %xmm2, %xmm3
// CHECK: vpshufb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9]
vpshufb %xmm1, %xmm2, %xmm3
// CHECK: vpshufb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18]
vpshufb (%eax), %xmm2, %xmm3
// CHECK: vpsignb %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9]
vpsignb %xmm1, %xmm2, %xmm3
// CHECK: vpsignb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18]
vpsignb (%eax), %xmm2, %xmm3
// CHECK: vpsignw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9]
vpsignw %xmm1, %xmm2, %xmm3
// CHECK: vpsignw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18]
vpsignw (%eax), %xmm2, %xmm3
// CHECK: vpsignd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9]
vpsignd %xmm1, %xmm2, %xmm3
// CHECK: vpsignd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18]
vpsignd (%eax), %xmm2, %xmm3
// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9]
vpmulhrsw %xmm1, %xmm2, %xmm3
// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18]
vpmulhrsw (%eax), %xmm2, %xmm3
// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07]
vpalignr $7, %xmm1, %xmm2, %xmm3
// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07]
vpalignr $7, (%eax), %xmm2, %xmm3

View File

@ -1662,3 +1662,131 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
vhsubpd (%rax), %xmm12, %xmm13
// CHECK: vpabsb %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3]
vpabsb %xmm11, %xmm12
// CHECK: vpabsb (%rax), %xmm12
// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20]
vpabsb (%rax), %xmm12
// CHECK: vpabsw %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3]
vpabsw %xmm11, %xmm12
// CHECK: vpabsw (%rax), %xmm12
// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20]
vpabsw (%rax), %xmm12
// CHECK: vpabsd %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3]
vpabsd %xmm11, %xmm12
// CHECK: vpabsd (%rax), %xmm12
// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20]
vpabsd (%rax), %xmm12
// CHECK: vphaddw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb]
vphaddw %xmm11, %xmm12, %xmm13
// CHECK: vphaddw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28]
vphaddw (%rax), %xmm12, %xmm13
// CHECK: vphaddd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb]
vphaddd %xmm11, %xmm12, %xmm13
// CHECK: vphaddd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28]
vphaddd (%rax), %xmm12, %xmm13
// CHECK: vphaddsw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb]
vphaddsw %xmm11, %xmm12, %xmm13
// CHECK: vphaddsw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28]
vphaddsw (%rax), %xmm12, %xmm13
// CHECK: vphsubw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb]
vphsubw %xmm11, %xmm12, %xmm13
// CHECK: vphsubw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28]
vphsubw (%rax), %xmm12, %xmm13
// CHECK: vphsubd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb]
vphsubd %xmm11, %xmm12, %xmm13
// CHECK: vphsubd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28]
vphsubd (%rax), %xmm12, %xmm13
// CHECK: vphsubsw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb]
vphsubsw %xmm11, %xmm12, %xmm13
// CHECK: vphsubsw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28]
vphsubsw (%rax), %xmm12, %xmm13
// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb]
vpmaddubsw %xmm11, %xmm12, %xmm13
// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28]
vpmaddubsw (%rax), %xmm12, %xmm13
// CHECK: vpshufb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb]
vpshufb %xmm11, %xmm12, %xmm13
// CHECK: vpshufb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28]
vpshufb (%rax), %xmm12, %xmm13
// CHECK: vpsignb %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb]
vpsignb %xmm11, %xmm12, %xmm13
// CHECK: vpsignb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28]
vpsignb (%rax), %xmm12, %xmm13
// CHECK: vpsignw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb]
vpsignw %xmm11, %xmm12, %xmm13
// CHECK: vpsignw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28]
vpsignw (%rax), %xmm12, %xmm13
// CHECK: vpsignd %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb]
vpsignd %xmm11, %xmm12, %xmm13
// CHECK: vpsignd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28]
vpsignd (%rax), %xmm12, %xmm13
// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb]
vpmulhrsw %xmm11, %xmm12, %xmm13
// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28]
vpmulhrsw (%rax), %xmm12, %xmm13
// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07]
vpalignr $7, %xmm11, %xmm12, %xmm13
// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07]
vpalignr $7, (%rax), %xmm12, %xmm13