diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 01dfc12d039e..32730855734d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -256,10 +256,10 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), let isAsmParserOnly = 1 in { def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>, XS, VEX_4V; + [(store FR32:$src, addr:$dst)]>, XS, VEX; def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>, XD, VEX_4V; + [(store FR64:$src, addr:$dst)]>, XD, VEX; } // Extract and store. @@ -5018,4 +5018,27 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; +// Conditional SIMD Packed Loads and Stores +multiclass avx_movmask_rm opc_rm, bits<8> opc_mr, string OpcodeStr> { + def rm : AVX8I, VEX_4V; + def Yrm : AVX8I, VEX_4V; + def mr : AVX8I, VEX_4V; + def Ymr : AVX8I, VEX_4V; +} + +defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps">; +defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd">; + } // isAsmParserOnly diff --git a/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/X86MCCodeEmitter.cpp index 23b0666f5f30..230c9d041807 100644 --- a/llvm/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/X86MCCodeEmitter.cpp @@ -469,30 +469,36 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, unsigned NumOps = MI.getNumOperands(); unsigned CurOp = 0; + bool IsDestMem = false; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRMDestMem: + IsDestMem = true; + // The important info for the VEX prefix is never beyond the address + // registers. Don't check beyond that. + NumOps = CurOp = X86::AddrNumOperands; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: - case X86II::MRMDestMem: - NumOps = CurOp = X86::AddrNumOperands; case X86II::MRMSrcMem: case X86II::MRMSrcReg: if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; - - // CurOp and NumOps are equal when VEX_R represents a register used - // to index a memory destination (which is the last operand) - CurOp = (CurOp == NumOps) ? 0 : CurOp+1; + CurOp++; if (HasVEX_4V) { - VEX_4V = getVEXRegisterEncoding(MI, CurOp); + VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp); CurOp++; } + // To only check operands before the memory address ones, start + // the search from the begining + if (IsDestMem) + CurOp = 0; + // If the last register should be encoded in the immediate field // do not use any bit from VEX prefix to this register, ignore it if (TSFlags & X86II::VEX_I8IMM) @@ -833,10 +839,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + X86::AddrNumOperands; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + EmitMemModRMByte(MI, CurOp, - GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)), + GetX86RegNum(MI.getOperand(SrcRegNum)), TSFlags, CurByte, OS, Fixups); - CurOp += X86::AddrNumOperands + 1; + CurOp = SrcRegNum + 1; break; case X86II::MRMSrcReg: diff --git a/llvm/test/MC/AsmParser/X86/x86_32-encoding.s b/llvm/test/MC/AsmParser/X86/x86_32-encoding.s index 495e940c0f05..daa8cb583b8f 100644 --- a/llvm/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/llvm/test/MC/AsmParser/X86/x86_32-encoding.s @@ -13030,3 +13030,35 @@ // CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07] vextractf128 $7, %ymm2, (%eax) +// CHECK: vmaskmovpd %xmm2, %xmm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10] + vmaskmovpd %xmm2, %xmm5, (%eax) + +// CHECK: vmaskmovpd %ymm2, %ymm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10] + vmaskmovpd %ymm2, %ymm5, (%eax) + +// CHECK: vmaskmovpd (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28] + vmaskmovpd (%eax), %xmm2, %xmm5 + +// CHECK: vmaskmovpd (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28] + vmaskmovpd (%eax), %ymm2, %ymm5 + +// CHECK: vmaskmovps %xmm2, %xmm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10] + vmaskmovps %xmm2, %xmm5, (%eax) + +// CHECK: vmaskmovps %ymm2, %ymm5, (%eax) +// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10] + vmaskmovps %ymm2, %ymm5, (%eax) + +// CHECK: vmaskmovps (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28] + vmaskmovps (%eax), %xmm2, %xmm5 + +// CHECK: vmaskmovps (%eax), %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28] + vmaskmovps (%eax), %ymm2, %ymm5 + diff --git a/llvm/test/MC/AsmParser/X86/x86_64-encoding.s b/llvm/test/MC/AsmParser/X86/x86_64-encoding.s index bf232abe2a22..5affb4e0d69f 100644 --- a/llvm/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/llvm/test/MC/AsmParser/X86/x86_64-encoding.s @@ -3104,3 +3104,35 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07] vextractf128 $7, %ymm12, (%rax) +// CHECK: vmaskmovpd %xmm12, %xmm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20] + vmaskmovpd %xmm12, %xmm10, (%rax) + +// CHECK: vmaskmovpd %ymm12, %ymm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20] + vmaskmovpd %ymm12, %ymm10, (%rax) + +// CHECK: vmaskmovpd (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10] + vmaskmovpd (%rax), %xmm12, %xmm10 + +// CHECK: vmaskmovpd (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10] + vmaskmovpd (%rax), %ymm12, %ymm10 + +// CHECK: vmaskmovps %xmm12, %xmm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20] + vmaskmovps %xmm12, %xmm10, (%rax) + +// CHECK: vmaskmovps %ymm12, %ymm10, (%rax) +// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20] + vmaskmovps %ymm12, %ymm10, (%rax) + +// CHECK: vmaskmovps (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10] + vmaskmovps (%rax), %xmm12, %xmm10 + +// CHECK: vmaskmovps (%rax), %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10] + vmaskmovps (%rax), %ymm12, %ymm10 +