[X86] Adding support for missing variations of X86 string related instructions

The following are legal according to X86 spec:
ins mem, DX
outs DX, mem
lods mem
stos mem
scas mem
cmps mem, mem
movs mem, mem

Differential Revision: http://reviews.llvm.org/D14827

llvm-svn: 258132
This commit is contained in:
Marina Yatsina 2016-01-19 15:37:56 +00:00
parent b5d539380a
commit b9f4f62cfe
4 changed files with 243 additions and 75 deletions

View File

@ -683,9 +683,14 @@ private:
std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
void AddDefaultSrcDestOperands(
OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
bool IsSIReg(unsigned Reg);
unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
void
AddDefaultSrcDestOperands(OperandVector &Operands,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
OperandVector &FinalOperands);
std::unique_ptr<X86Operand> ParseOperand();
std::unique_ptr<X86Operand> ParseATTOperand();
std::unique_ptr<X86Operand> ParseIntelOperand();
@ -747,11 +752,6 @@ private:
bool OmitRegisterFromClobberLists(unsigned RegNo) override;
/// doSrcDstMatch - Returns true if operands are matching in their
/// word size (%si and %di, %esi and %edi, etc.). Order depends on
/// the parsing mode (Intel vs. AT&T).
bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
/// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
/// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
/// \return \c true if no parsing errors occurred, \c false otherwise.
@ -867,27 +867,6 @@ static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
return false;
}
bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
{
// Return true and let a normal complaint about bogus operands happen.
if (!Op1.isMem() || !Op2.isMem())
return true;
// Actually these might be the other way round if Intel syntax is
// being used. It doesn't matter.
unsigned diReg = Op1.Mem.BaseReg;
unsigned siReg = Op2.Mem.BaseReg;
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
// Again, return true and let another error happen.
return true;
}
bool X86AsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
@ -1025,6 +1004,37 @@ std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
Loc, Loc, 0);
}
bool X86AsmParser::IsSIReg(unsigned Reg) {
switch (Reg) {
default:
assert("Only (R|E)SI and (R|E)DI are expected!");
return false;
case X86::RSI:
case X86::ESI:
case X86::SI:
return true;
case X86::RDI:
case X86::EDI:
case X86::DI:
return false;
}
}
unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
bool IsSIReg) {
switch (RegClassID) {
default:
assert("Unexpected register class");
return Reg;
case X86::GR64RegClassID:
return IsSIReg ? X86::RSI : X86::RDI;
case X86::GR32RegClassID:
return IsSIReg ? X86::ESI : X86::EDI;
case X86::GR16RegClassID:
return IsSIReg ? X86::SI : X86::DI;
}
}
void X86AsmParser::AddDefaultSrcDestOperands(
OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
@ -1038,6 +1048,76 @@ void X86AsmParser::AddDefaultSrcDestOperands(
}
}
bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
OperandVector &FinalOperands) {
if (OrigOperands.size() > 1) {
// Check if sizes match, OrigOpernads also contains the instruction name
assert(OrigOperands.size() == FinalOperands.size() + 1 &&
"Opernand size mismatch");
// Verify types match
int RegClassID = -1;
for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
if (FinalOp.isReg() &&
(!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
// Return false and let a normal complaint about bogus operands happen
return false;
if (FinalOp.isMem()) {
if (!OrigOp.isMem())
// Return false and let a normal complaint about bogus operands happen
return false;
unsigned OrigReg = OrigOp.Mem.BaseReg;
unsigned FinalReg = FinalOp.Mem.BaseReg;
// If we've already encounterd a register class, make sure all register
// bases are of the same register class
if (RegClassID != -1 &&
!X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
return Error(OrigOp.getStartLoc(),
"mismatching source and destination index registers");
}
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
RegClassID = X86::GR64RegClassID;
else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
RegClassID = X86::GR32RegClassID;
else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
RegClassID = X86::GR16RegClassID;
bool IsSI = IsSIReg(FinalReg);
FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
if (FinalReg != OrigReg) {
std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
Warning(OrigOp.getStartLoc(),
"memory operand is only for determining the size, " +
RegName + " will be used for the location");
}
FinalOp.Mem.Size = OrigOp.Mem.Size;
FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
FinalOp.Mem.BaseReg = FinalReg;
}
}
// Remove old operandss
for (unsigned int i = 0; i < FinalOperands.size(); ++i)
OrigOperands.pop_back();
}
// OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
for (unsigned int i = 0; i < FinalOperands.size(); ++i)
OrigOperands.push_back(std::move(FinalOperands[i]));
return false;
}
std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
if (isParsingIntelSyntax())
return ParseIntelOperand();
@ -2274,84 +2354,92 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
}
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
bool HadVerifyError = false;
// Append default arguments to "ins[bwld]"
if (Name.startswith("ins") && Operands.size() == 1 &&
(Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) {
AddDefaultSrcDestOperands(Operands,
if (Name.startswith("ins") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
Name == "ins")) {
AddDefaultSrcDestOperands(TmpOperands,
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
DefaultMemDIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Append default arguments to "outs[bwld]"
if (Name.startswith("outs") && Operands.size() == 1 &&
if (Name.startswith("outs") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "outsb" || Name == "outsw" || Name == "outsl" ||
Name == "outsd" )) {
AddDefaultSrcDestOperands(Operands,
DefaultMemSIOperand(NameLoc),
Name == "outsd" || Name == "outs")) {
AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
// values of $SIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
if (Name.startswith("lods") && Operands.size() == 1 &&
if (Name.startswith("lods") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
Operands.push_back(DefaultMemSIOperand(NameLoc));
Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
// values of $DIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
if (Name.startswith("stos") && Operands.size() == 1 &&
if (Name.startswith("stos") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "stos" || Name == "stosb" || Name == "stosw" ||
Name == "stosl" || Name == "stosd" || Name == "stosq"))
Operands.push_back(DefaultMemDIOperand(NameLoc));
Name == "stosl" || Name == "stosd" || Name == "stosq")) {
TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
// values of $DIREG according to the mode. It would be nice if this
// could be achieved with InstAlias in the tables.
if (Name.startswith("scas") && Operands.size() == 1 &&
if (Name.startswith("scas") &&
(Operands.size() == 1 || Operands.size() == 2) &&
(Name == "scas" || Name == "scasb" || Name == "scasw" ||
Name == "scasl" || Name == "scasd" || Name == "scasq"))
Operands.push_back(DefaultMemDIOperand(NameLoc));
Name == "scasl" || Name == "scasd" || Name == "scasq")) {
TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Add default SI and DI operands to "cmps[bwlq]".
if (Name.startswith("cmps") &&
(Operands.size() == 1 || Operands.size() == 3) &&
(Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
if (Operands.size() == 1) {
AddDefaultSrcDestOperands(Operands,
DefaultMemDIOperand(NameLoc),
DefaultMemSIOperand(NameLoc));
} else if (Operands.size() == 3) {
X86Operand &Op = (X86Operand &)*Operands[1];
X86Operand &Op2 = (X86Operand &)*Operands[2];
if (!doSrcDstMatch(Op, Op2))
return Error(Op.getStartLoc(),
"mismatching source and destination index registers");
}
AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
DefaultMemSIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Add default SI and DI operands to "movs[bwlq]".
if ((Name.startswith("movs") &&
(Name == "movs" || Name == "movsb" || Name == "movsw" ||
Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
(Name.startswith("smov") &&
(Name == "smov" || Name == "smovb" || Name == "smovw" ||
Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
if (Operands.size() == 1) {
if (Name == "movsd")
Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
AddDefaultSrcDestOperands(Operands,
DefaultMemSIOperand(NameLoc),
DefaultMemDIOperand(NameLoc));
} else if (Operands.size() == 3) {
X86Operand &Op = (X86Operand &)*Operands[1];
X86Operand &Op2 = (X86Operand &)*Operands[2];
if (!doSrcDstMatch(Op, Op2))
return Error(Op.getStartLoc(),
"mismatching source and destination index registers");
}
if (((Name.startswith("movs") &&
(Name == "movs" || Name == "movsb" || Name == "movsw" ||
Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
(Name.startswith("smov") &&
(Name == "smov" || Name == "smovb" || Name == "smovw" ||
Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
(Operands.size() == 1 || Operands.size() == 3)) {
if (Name == "movsd" && Operands.size() == 1)
Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
DefaultMemDIOperand(NameLoc));
HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
}
// Check if we encountered an error for one the string insturctions
if (HadVerifyError) {
return HadVerifyError;
}
// FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to

View File

@ -2782,6 +2782,11 @@ def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src), 0>;
def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
def : InstAlias<"lods\t$src", (LODSB srcidx8:$src), 0>;
def : InstAlias<"lods\t$src", (LODSW srcidx16:$src), 0>;
def : InstAlias<"lods\t$src", (LODSL srcidx32:$src), 0>;
def : InstAlias<"lods\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
// stos aliases. Accept the source being omitted because it's implicit in
// the mnemonic, or the mnemonic suffix being omitted because it's implicit
@ -2794,6 +2799,11 @@ def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>;
def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
def : InstAlias<"stos\t$dst", (STOSB dstidx8:$dst), 0>;
def : InstAlias<"stos\t$dst", (STOSW dstidx16:$dst), 0>;
def : InstAlias<"stos\t$dst", (STOSL dstidx32:$dst), 0>;
def : InstAlias<"stos\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
// scas aliases. Accept the destination being omitted because it's implicit
// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
@ -2806,6 +2816,24 @@ def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>;
def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
def : InstAlias<"scas\t$dst", (SCASB dstidx8:$dst), 0>;
def : InstAlias<"scas\t$dst", (SCASW dstidx16:$dst), 0>;
def : InstAlias<"scas\t$dst", (SCASL dstidx32:$dst), 0>;
def : InstAlias<"scas\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
// cmps aliases. Mnemonic suffix being omitted because it's implicit
// in the destination.
def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSB dstidx8:$dst, srcidx8:$src), 0>;
def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSW dstidx16:$dst, srcidx16:$src), 0>;
def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSL dstidx32:$dst, srcidx32:$src), 0>;
def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>;
// movs aliases. Mnemonic suffix being omitted because it's implicit
// in the destination.
def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSB dstidx8:$dst, srcidx8:$src), 0>;
def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSW dstidx16:$dst, srcidx16:$src), 0>;
def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSL dstidx32:$dst, srcidx32:$src), 0>;
def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>;
// div and idiv aliases for explicit A register.
def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>;
@ -2918,6 +2946,18 @@ def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i3
def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
// ins aliases. Accept the mnemonic suffix being omitted because it's implicit
// in the destination.
def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSB dstidx8:$dst), 0>;
def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSW dstidx16:$dst), 0>;
def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSL dstidx32:$dst), 0>;
// outs aliases. Accept the mnemonic suffix being omitted because it's implicit
// in the source.
def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSB srcidx8:$src), 0>;
def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSW srcidx16:$src), 0>;
def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSL srcidx32:$src), 0>;
// inb %dx -> inb %al, %dx
def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>;

View File

@ -144,3 +144,19 @@ insw %dx, (%edi)
// 64: insw %dx, %es:(%edi) # encoding: [0x66,0x67,0x6d]
// 32: insw %dx, %es:(%edi) # encoding: [0x66,0x6d]
// 16: insw %dx, %es:(%edi) # encoding: [0x67,0x6d]
insw %dx, (%bx)
// ERR64: invalid 16-bit base register
// 32: insw %dx, %es:(%di) # encoding: [0x66,0x67,0x6d]
// 16: insw %dx, %es:(%di) # encoding: [0x6d]
insw %dx, (%ebx)
// 64: insw %dx, %es:(%edi) # encoding: [0x66,0x67,0x6d]
// 32: insw %dx, %es:(%edi) # encoding: [0x66,0x6d]
// 16: insw %dx, %es:(%edi) # encoding: [0x67,0x6d]
insw %dx, (%rbx)
// 64: insw %dx, %es:(%rdi) # encoding: [0x66,0x6d]
// ERR32: 64-bit
// ERR16: 64-bit

View File

@ -751,3 +751,27 @@ loopnz _foo
sidt fword ptr [eax]
// CHECK: sidtq (%eax)
ins byte ptr [eax], dx
// CHECK: insb %dx, %es:(%edi)
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location
outs dx, word ptr [eax]
// CHECK: outsw (%esi), %dx
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)SI will be used for the location
lods dword ptr [eax]
// CHECK: lodsl (%esi), %eax
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)SI will be used for the location
stos qword ptr [eax]
// CHECK: stosq %rax, %es:(%edi)
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location
scas byte ptr [eax]
// CHECK: scasb %es:(%edi), %al
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location
cmps word ptr [eax], word ptr [ebx]
// CHECK: cmpsw %es:(%edi), (%esi)
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)SI will be used for the location
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location
movs dword ptr [eax], dword ptr [ebx]
// CHECK: movsl (%esi), %es:(%edi)
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location
// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)SI will be used for the location