Add support for macro parameters/arguments delimited by spaces,

to improve compatibility with GNU as.

Based on a patch by PaX Team.

Fixed assertion failures on non-Darwin and added additional test cases.

llvm-svn: 164248
This commit is contained in:
Preston Gurd 2012-09-19 20:36:12 +00:00
parent 12ccbe7a8e
commit 055006475e
6 changed files with 188 additions and 33 deletions

View File

@ -40,6 +40,7 @@ public:
// No-value.
EndOfStatement,
Colon,
Space,
Plus, Minus, Tilde,
Slash, // '/'
BackSlash, // '\'
@ -126,6 +127,7 @@ class MCAsmLexer {
void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
protected: // Can only create subclasses.
const char *TokStart;
bool SkipSpace;
MCAsmLexer();
@ -175,6 +177,9 @@ public:
/// isNot - Check if the current token has kind \p K.
bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
/// setSkipSpace - Set whether spaces should be ignored by the lexer
void setSkipSpace(bool val) { SkipSpace = val; }
};
} // End llvm namespace

View File

@ -396,8 +396,17 @@ AsmToken AsmLexer::LexToken() {
case 0:
case ' ':
case '\t':
// Ignore whitespace.
return LexToken();
if (SkipSpace) {
// Ignore whitespace.
return LexToken();
} else {
int len = 1;
while (*CurPtr==' ' || *CurPtr=='\t') {
CurPtr++;
len++;
}
return AsmToken(AsmToken::Space, StringRef(TokStart, len));
}
case '\n': // FALL THROUGH.
case '\r':
isAtStartOfLine = true;

View File

@ -130,6 +130,9 @@ private:
/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
unsigned AssemblerDialect;
/// IsDarwin - is Darwin compatibility enabled?
bool IsDarwin;
public:
AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
const MCAsmInfo &MAI);
@ -209,7 +212,8 @@ private:
virtual void EatToEndOfStatement();
bool ParseMacroArgument(MacroArgument &MA);
bool ParseMacroArgument(MacroArgument &MA,
AsmToken::TokenKind &ArgumentDelimiter);
bool ParseMacroArguments(const Macro *M, MacroArguments &A);
/// \brief Parse up to the end of statement and a return the contents from the
@ -407,8 +411,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
MCStreamer &_Out, const MCAsmInfo &_MAI)
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
GenericParser(new GenericAsmParser), PlatformParser(0),
CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
AssemblerDialect(~0U) {
CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
AssemblerDialect(~0U), IsDarwin(false) {
// Save the old handler.
SavedDiagHandler = SrcMgr.getDiagHandler();
SavedDiagContext = SrcMgr.getDiagContext();
@ -429,6 +433,7 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
} else if (_MAI.hasSubsectionsViaSymbols()) {
PlatformParser = createDarwinAsmParser();
PlatformParser->Initialize(*this);
IsDarwin = true;
} else {
PlatformParser = createELFAsmParser();
PlatformParser->Initialize(*this);
@ -1471,6 +1476,8 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
if (NParameters != 0 && NParameters != A.size())
return Error(L, "Wrong number of arguments");
// A macro without parameters is handled differently on Darwin:
// gas accepts no arguments and does no substitutions
while (!Body.empty()) {
// Scan for the next substitution.
std::size_t End = Body.size(), Pos = 0;
@ -1537,15 +1544,23 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
if (Parameters[Index].first == Argument)
break;
// FIXME: We should error at the macro definition.
if (Index == NParameters)
return Error(L, "Parameter not found");
if (Index == NParameters) {
if (Body[Pos+1] == '(' && Body[Pos+2] == ')')
Pos += 3;
else {
OS << '\\' << Argument;
Pos = I;
}
} else {
for (MacroArgument::const_iterator it = A[Index].begin(),
ie = A[Index].end(); it != ie; ++it)
if (it->getKind() == AsmToken::String)
OS << it->getStringContents();
else
OS << it->getString();
for (MacroArgument::const_iterator it = A[Index].begin(),
ie = A[Index].end(); it != ie; ++it)
OS << it->getString();
Pos += 1 + Argument.size();
Pos += 1 + Argument.size();
}
}
// Update the scan point.
Body = Body.substr(Pos);
@ -1560,22 +1575,97 @@ MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
{
}
static bool IsOperator(AsmToken::TokenKind kind)
{
switch (kind)
{
default:
return false;
case AsmToken::Plus:
case AsmToken::Minus:
case AsmToken::Tilde:
case AsmToken::Slash:
case AsmToken::Star:
case AsmToken::Dot:
case AsmToken::Equal:
case AsmToken::EqualEqual:
case AsmToken::Pipe:
case AsmToken::PipePipe:
case AsmToken::Caret:
case AsmToken::Amp:
case AsmToken::AmpAmp:
case AsmToken::Exclaim:
case AsmToken::ExclaimEqual:
case AsmToken::Percent:
case AsmToken::Less:
case AsmToken::LessEqual:
case AsmToken::LessLess:
case AsmToken::LessGreater:
case AsmToken::Greater:
case AsmToken::GreaterEqual:
case AsmToken::GreaterGreater:
return true;
}
}
/// ParseMacroArgument - Extract AsmTokens for a macro argument.
/// This is used for both default macro parameter values and the
/// arguments in macro invocations
bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
bool AsmParser::ParseMacroArgument(MacroArgument &MA,
AsmToken::TokenKind &ArgumentDelimiter) {
unsigned ParenLevel = 0;
unsigned AddTokens = 0;
// gas accepts arguments separated by whitespace, except on Darwin
if (!IsDarwin)
Lexer.setSkipSpace(false);
for (;;) {
if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) {
Lexer.setSkipSpace(true);
return TokError("unexpected token in macro instantiation");
}
if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
// Spaces and commas cannot be mixed to delimit parameters
if (ArgumentDelimiter == AsmToken::Eof)
ArgumentDelimiter = AsmToken::Comma;
else if (ArgumentDelimiter != AsmToken::Comma) {
Lexer.setSkipSpace(true);
return TokError("expected ' ' for macro argument separator");
}
break;
}
if (Lexer.is(AsmToken::Space)) {
Lex(); // Eat spaces
// Spaces can delimit parameters, but could also be part an expression.
// If the token after a space is an operator, add the token and the next
// one into this argument
if (ArgumentDelimiter == AsmToken::Space ||
ArgumentDelimiter == AsmToken::Eof) {
if (IsOperator(Lexer.getKind())) {
// Check to see whether the token is used as an operator,
// or part of an identifier
const char *NextChar = getTok().getEndLoc().getPointer() + 1;
if (*NextChar == ' ')
AddTokens = 2;
}
if (!AddTokens && ParenLevel == 0) {
if (ArgumentDelimiter == AsmToken::Eof &&
!IsOperator(Lexer.getKind()))
ArgumentDelimiter = AsmToken::Space;
break;
}
}
}
// HandleMacroEntry relies on not advancing the lexer here
// to be able to fill in the remaining default parameter values
if (Lexer.is(AsmToken::EndOfStatement))
break;
if (ParenLevel == 0 && Lexer.is(AsmToken::Comma))
break;
// Adjust the current parentheses level.
if (Lexer.is(AsmToken::LParen))
@ -1585,8 +1675,12 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
// Append the token to the current argument list.
MA.push_back(getTok());
if (AddTokens)
AddTokens--;
Lex();
}
Lexer.setSkipSpace(true);
if (ParenLevel != 0)
return TokError("unbalanced parentheses in macro argument");
return false;
@ -1595,6 +1689,9 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
// Parse the macro instantiation arguments.
bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
const unsigned NParameters = M ? M->Parameters.size() : 0;
// Argument delimiter is initially unknown. It will be set by
// ParseMacroArgument()
AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
// Parse two kinds of macro invocations:
// - macros defined without any parameters accept an arbitrary number of them
@ -1603,7 +1700,7 @@ bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
++Parameter) {
MacroArgument MA;
if (ParseMacroArgument(MA))
if (ParseMacroArgument(MA, ArgumentDelimiter))
return true;
if (!MA.empty() || !NParameters)
@ -3105,6 +3202,9 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
return TokError("expected identifier in '.macro' directive");
MacroParameters Parameters;
// Argument delimiter is initially unknown. It will be set by
// ParseMacroArgument()
AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
MacroParameter Parameter;
@ -3113,21 +3213,19 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
if (getLexer().is(AsmToken::Equal)) {
Lex();
if (getParser().ParseMacroArgument(Parameter.second))
if (getParser().ParseMacroArgument(Parameter.second, ArgumentDelimiter))
return true;
}
Parameters.push_back(Parameter);
if (getLexer().isNot(AsmToken::Comma))
if (getLexer().is(AsmToken::Comma))
Lex();
else if (getLexer().is(AsmToken::EndOfStatement))
break;
Lex();
}
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.macro' directive");
// Eat the end of statement.
Lex();

View File

@ -12,7 +12,8 @@
using namespace llvm;
MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) {
MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()),
TokStart(0), SkipSpace(true) {
}
MCAsmLexer::~MCAsmLexer() {

View File

@ -0,0 +1,9 @@
// RUN: not llvm-mc -triple i386-apple-darwin10 %s 2> %t.err | FileCheck %s
.macro test1
.globl "$0 $1 $2 $$3 $n"
.endmacro
// CHECK: .globl "1 23 $3 2"
test1 1, 2 3

View File

@ -1,4 +1,4 @@
// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s
// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t.err | FileCheck %s
// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
.macro .test0
@ -28,33 +28,66 @@ test2 10
.globl "$0 $1 $2 $$3 $n"
.endmacro
// CHECK: .globl "1 23 $3 2"
test3 1,2 3
// CHECK: .globl "1 (23) $3 2"
test3 1, (2 3)
// CHECK: .globl "1 2 $3 2"
test3 1 2
.macro test4
.globl "$0 -- $1"
.endmacro
// CHECK: .globl "ab)(,) -- (cd)"
test4 a b)(,),(cd)
// CHECK: .globl "(ab)(,)) -- (cd)"
test4 (a b)(,)),(cd)
// CHECK: .globl "(ab)(,)) -- (cd)"
test4 (a b)(,)),(cd)
.macro test5 _a
.globl "\_a"
.endm
test5 zed1
// CHECK: .globl zed1
test5 zed1
.macro test6 $a
.globl "\$a"
.endm
test6 zed2
// CHECK: .globl zed2
test6 zed2
.macro test7 .a
.globl "\.a"
.endm
test7 zed3
// CHECK: .globl zed3
test7 zed3
.macro test8 _a, _b, _c
.globl "\_a,\_b,\_c"
.endmacro
.macro test9 _a _b _c
.globl "\_a \_b \_c"
.endmacro
// CHECK: .globl "a,b,c"
test8 a, b, c
// CHECK: .globl "%1,%2,%3"
test8 %1 %2 %3 #a comment
// CHECK: .globl "x-y,z,1"
test8 x - y z 1
// CHECK: .globl "1 2 3"
test9 1, 2,3
test8 1,2 3
// CHECK-ERRORS: error: macro argument '_c' is missing
// CHECK-ERRORS-NEXT: test8 1,2 3
// CHECK-ERRORS-NEXT: ^
test8 1 2, 3
// CHECK-ERRORS: error: expected ' ' for macro argument separator
// CHECK-ERRORS-NEXT:test8 1 2, 3
// CHECK-ERRORS-NEXT: ^