From d67aea28f6cb180291f95690ece485740f3fe859 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 6 Mar 2012 03:21:47 +0000 Subject: [PATCH] User-defined literals: reject string and character UDLs in all places where the grammar requires a string-literal and not a user-defined-string-literal. The two constructs are still represented by the same TokenKind, in order to prevent a combinatorial explosion of different kinds of token. A flag on Token tracks whether a ud-suffix is present, in order to prevent clients from needing to look at the token's spelling. llvm-svn: 152098 --- .../clang/Basic/DiagnosticCommonKinds.td | 2 + .../include/clang/Basic/DiagnosticLexKinds.td | 3 + .../clang/Basic/DiagnosticParseKinds.td | 7 ++- clang/include/clang/Lex/Token.h | 6 +- clang/include/clang/Parse/Parser.h | 2 +- clang/lib/Lex/Lexer.cpp | 1 + clang/lib/Lex/ModuleMap.cpp | 6 ++ clang/lib/Lex/PPDirectives.cpp | 14 ++++- clang/lib/Lex/PPExpressions.cpp | 4 ++ clang/lib/Lex/PPMacroExpansion.cpp | 29 ++++----- clang/lib/Lex/Pragma.cpp | 23 +++++++ clang/lib/Lex/TokenConcatenation.cpp | 39 +++++++++++- clang/lib/Parse/ParseDeclCXX.cpp | 9 ++- clang/lib/Parse/ParseExpr.cpp | 23 ++++--- clang/lib/Parse/ParseExprCXX.cpp | 7 ++- clang/lib/Parse/ParseObjc.cpp | 2 + clang/lib/Parse/ParseStmt.cpp | 6 +- clang/lib/Parse/Parser.cpp | 9 +-- clang/lib/Rewrite/HTMLRewrite.cpp | 1 + .../CXX/over/over.oper/over.literal/p8.cpp | 2 +- clang/test/Lexer/token-concat.cpp | 19 ++++++ clang/test/Parser/asm.cpp | 8 +++ .../Parser/cxx11-user-defined-literals.cpp | 60 +++++++++++++++++++ .../Parser/objcxx11-user-defined-literal.mm | 3 + 24 files changed, 245 insertions(+), 40 deletions(-) create mode 100644 clang/test/Lexer/token-concat.cpp create mode 100644 clang/test/Parser/asm.cpp create mode 100644 clang/test/Parser/cxx11-user-defined-literals.cpp create mode 100644 clang/test/Parser/objcxx11-user-defined-literal.mm diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index 0a58d1f42278..c649cfcf7c8f 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -41,6 +41,8 @@ def err_expected_colon : Error<"expected ':'">; def err_expected_colon_after_setter_name : Error< "method name referenced in property setter attribute " "must end with ':'">; +def err_invalid_string_udl : Error< + "string literal with user-defined suffix cannot be used here">; // Parse && Sema def ext_no_declarators : ExtWarn<"declaration does not declare anything">, diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 96edbe040ffc..776568f076d7 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -136,6 +136,9 @@ def err_unsupported_string_concat : Error< def err_string_concat_mixed_suffix : Error< "differing user-defined suffixes ('%0' and '%1') in string literal " "concatenation">; +def err_pp_invalid_char_udl : Error< + "character literal with user-defined suffix cannot be used in preprocessor " + "constant expression">; def err_bad_string_encoding : Error< "illegal character encoding in string literal">; def warn_bad_string_encoding : ExtWarn< diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 9d726a662411..69a6b988a961 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -200,7 +200,7 @@ def err_address_of_label_outside_fn : Error< "use of address-of-label extension outside of a function body">; def err_expected_string_literal : Error<"expected string literal">; def err_asm_operand_wide_string_literal : Error< - "cannot use wide string literal in 'asm'">; + "cannot use %select{unicode|wide}0 string literal in 'asm'">; def err_expected_selector_for_method : Error< "expected selector for Objective-C method">; def err_expected_property_name : Error<"expected property name">; @@ -425,8 +425,11 @@ def err_parser_impl_limit_overflow : Error< def err_dup_virtual : Error<"duplicate 'virtual' in base specifier">; // C++ operator overloading -def err_operator_string_not_empty : Error< +def err_literal_operator_string_not_empty : Error< "string literal after 'operator' must be '\"\"'">; +def err_literal_operator_missing_space : Error< + "C++11 requires a space between the \"\" and the user-defined suffix in a " + "literal operator">; def warn_cxx98_compat_literal_operator : Warning< "literal operators are incompatible with C++98">, InGroup, DefaultIgnore; diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index e6dd1607e88b..a88f607298e8 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -75,7 +75,8 @@ public: LeadingSpace = 0x02, // Whitespace exists before this token. DisableExpand = 0x04, // This identifier may never be macro expanded. NeedsCleaning = 0x08, // Contained an escaped newline or trigraph. - LeadingEmptyMacro = 0x10 // Empty macro exists before this token. + LeadingEmptyMacro = 0x10, // Empty macro exists before this token. + HasUDSuffix = 0x20 // This string or character literal has a ud-suffix. }; tok::TokenKind getKind() const { return (tok::TokenKind)Kind; } @@ -263,6 +264,9 @@ public: return (Flags & LeadingEmptyMacro) ? true : false; } + /// \brief Return true if this token is a string or character literal which + /// has a ud-suffix. + bool hasUDSuffix() const { return (Flags & HasUDSuffix) ? true : false; } }; /// PPConditionalInfo - Information about the conditional stack (#if directives) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 3fc1fd9fb220..a2a0c6db7659 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1359,7 +1359,7 @@ private: SourceLocation LParenLoc, SourceLocation RParenLoc); - ExprResult ParseStringLiteralExpression(); + ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false); ExprResult ParseGenericSelectionExpression(); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 2b24d1cc75e9..a7fba8019d37 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1582,6 +1582,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) { unsigned Size; char C = getCharAndSize(CurPtr, Size); if (isIdentifierHead(C)) { + Result.setFlag(Token::HasUDSuffix); do { CurPtr = ConsumeChar(CurPtr, Size, Result); C = getCharAndSize(CurPtr, Size); diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 28043d119883..e6851afada51 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -617,6 +617,12 @@ retry: break; case tok::string_literal: { + if (LToken.hasUDSuffix()) { + Diags.Report(LToken.getLocation(), diag::err_invalid_string_udl); + HadError = true; + goto retry; + } + // Parse the string literal. LangOptions LangOpts; StringLiteralParser StringLiteral(<oken, 1, SourceMgr, LangOpts, *Target); diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 1e8f59023b9c..7345ef219736 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -822,8 +822,10 @@ void Preprocessor::HandleLineDirective(Token &Tok) { ; // ok else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_line_invalid_filename); - DiscardUntilEndOfDirective(); - return; + return DiscardUntilEndOfDirective(); + } else if (StrTok.hasUDSuffix()) { + Diag(StrTok, diag::err_invalid_string_udl); + return DiscardUntilEndOfDirective(); } else { // Parse and validate the string, converting it into a unique ID. StringLiteralParser Literal(&StrTok, 1, *this); @@ -957,6 +959,9 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_linemarker_invalid_filename); return DiscardUntilEndOfDirective(); + } else if (StrTok.hasUDSuffix()) { + Diag(StrTok, diag::err_invalid_string_udl); + return DiscardUntilEndOfDirective(); } else { // Parse and validate the string, converting it into a unique ID. StringLiteralParser Literal(&StrTok, 1, *this); @@ -1047,6 +1052,11 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { return; } + if (StrTok.hasUDSuffix()) { + Diag(StrTok, diag::err_invalid_string_udl); + return DiscardUntilEndOfDirective(); + } + // Verify that there is nothing after the string, other than EOD. CheckEndOfDirective("ident"); diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index c4ab143aaa46..8d8fe316e7eb 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -251,6 +251,10 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, case tok::wide_char_constant: { // L'x' case tok::utf16_char_constant: // u'x' case tok::utf32_char_constant: // U'x' + // Complain about, and drop, any ud-suffix. + if (PeekTok.hasUDSuffix()) + PP.Diag(PeekTok, diag::err_pp_invalid_char_udl); + SmallString<32> CharBuffer; bool CharInvalid = false; StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 99f2b23dff1c..777e0db0265a 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -825,6 +825,16 @@ static bool EvaluateHasIncludeCommon(Token &Tok, return false; } + // Get ')'. + PP.LexNonComment(Tok); + + // Ensure we have a trailing ). + if (Tok.isNot(tok::r_paren)) { + PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName(); + PP.Diag(LParenLoc, diag::note_matching) << "("; + return false; + } + bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename); // If GetIncludeFilenameSpelling set the start ptr to null, there was an // error. @@ -836,20 +846,8 @@ static bool EvaluateHasIncludeCommon(Token &Tok, const FileEntry *File = PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL); - // Get the result value. Result = true means the file exists. - bool Result = File != 0; - - // Get ')'. - PP.LexNonComment(Tok); - - // Ensure we have a trailing ). - if (Tok.isNot(tok::r_paren)) { - PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName(); - PP.Diag(LParenLoc, diag::note_matching) << "("; - return false; - } - - return Result; + // Get the result value. A result of true means the file exists. + return File != 0; } /// EvaluateHasInclude - Process a '__has_include("path")' expression. @@ -1091,6 +1089,9 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // from macro expansion. SmallVector StrToks; while (Tok.is(tok::string_literal)) { + // Complain about, and drop, any ud-suffix. + if (Tok.hasUDSuffix()) + Diag(Tok, diag::err_invalid_string_udl); StrToks.push_back(Tok); LexUnexpandedToken(Tok); } diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index 046a4d02f0ab..404feaab46f7 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -133,6 +133,20 @@ void Preprocessor::Handle_Pragma(Token &Tok) { Lex(Tok); if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) { Diag(PragmaLoc, diag::err__Pragma_malformed); + // Skip this token, and the ')', if present. + if (Tok.isNot(tok::r_paren)) + Lex(Tok); + if (Tok.is(tok::r_paren)) + Lex(Tok); + return; + } + + if (Tok.hasUDSuffix()) { + Diag(Tok, diag::err_invalid_string_udl); + // Skip this token, and the ')', if present. + Lex(Tok); + if (Tok.is(tok::r_paren)) + Lex(Tok); return; } @@ -442,6 +456,8 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { // "foo " "bar" "Baz" SmallVector StrToks; while (Tok.is(tok::string_literal)) { + if (Tok.hasUDSuffix()) + Diag(Tok, diag::err_invalid_string_udl); StrToks.push_back(Tok); Lex(Tok); } @@ -518,6 +534,8 @@ void Preprocessor::HandlePragmaMessage(Token &Tok) { // "foo " "bar" "Baz" SmallVector StrToks; while (Tok.is(tok::string_literal)) { + if (Tok.hasUDSuffix()) + Diag(Tok, diag::err_invalid_string_udl); StrToks.push_back(Tok); Lex(Tok); } @@ -577,6 +595,11 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) { return 0; } + if (Tok.hasUDSuffix()) { + Diag(Tok, diag::err_invalid_string_udl); + return 0; + } + // Remember the macro string. std::string StrVal = getSpelling(Tok); diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp index 335d864f3f9c..ca7e55d863c0 100644 --- a/clang/lib/Lex/TokenConcatenation.cpp +++ b/clang/lib/Lex/TokenConcatenation.cpp @@ -85,6 +85,19 @@ TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) { TokenInfo[tok::hash ] |= aci_custom_firstchar; TokenInfo[tok::arrow ] |= aci_custom_firstchar; + // These tokens have custom code in C++11 mode. + if (PP.getLangOptions().CPlusPlus0x) { + TokenInfo[tok::string_literal ] |= aci_custom; + TokenInfo[tok::wide_string_literal ] |= aci_custom; + TokenInfo[tok::utf8_string_literal ] |= aci_custom; + TokenInfo[tok::utf16_string_literal] |= aci_custom; + TokenInfo[tok::utf32_string_literal] |= aci_custom; + TokenInfo[tok::char_constant ] |= aci_custom; + TokenInfo[tok::wide_char_constant ] |= aci_custom; + TokenInfo[tok::utf16_char_constant ] |= aci_custom; + TokenInfo[tok::utf32_char_constant ] |= aci_custom; + } + // These tokens change behavior if followed by an '='. TokenInfo[tok::amp ] |= aci_avoid_equal; // &= TokenInfo[tok::plus ] |= aci_avoid_equal; // += @@ -183,6 +196,28 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, case tok::raw_identifier: llvm_unreachable("tok::raw_identifier in non-raw lexing mode!"); + case tok::string_literal: + case tok::wide_string_literal: + case tok::utf8_string_literal: + case tok::utf16_string_literal: + case tok::utf32_string_literal: + case tok::char_constant: + case tok::wide_char_constant: + case tok::utf16_char_constant: + case tok::utf32_char_constant: + if (!PP.getLangOptions().CPlusPlus0x) + return false; + + // In C++11, a string or character literal followed by an identifier is a + // single token. + if (Tok.getIdentifierInfo()) + return true; + + // A ud-suffix is an identifier. If the previous token ends with one, treat + // it as an identifier. + if (!PrevTok.hasUDSuffix()) + return false; + // FALL THROUGH. case tok::identifier: // id+id or id+number or id+L"foo". // id+'.'... will not append. if (Tok.is(tok::numeric_constant)) @@ -201,9 +236,11 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, // Otherwise, this is a narrow character or string. If the *identifier* // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo". return IsIdentifierStringPrefix(PrevTok); + case tok::numeric_constant: return isalnum(FirstChar) || Tok.is(tok::numeric_constant) || - FirstChar == '+' || FirstChar == '-' || FirstChar == '.'; + FirstChar == '+' || FirstChar == '-' || FirstChar == '.' || + (PP.getLangOptions().CPlusPlus0x && FirstChar == '_'); case tok::period: // ..., .*, .1234 return (FirstChar == '.' && PrevPrevTok.is(tok::period)) || isdigit(FirstChar) || diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 978b2b362d1d..89f024637e59 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -272,6 +272,11 @@ Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, unsigned Context) { if (Invalid) return 0; + // FIXME: This is incorrect: linkage-specifiers are parsed in translation + // phase 7, so string-literal concatenation is supposed to occur. + // extern "" "C" "" "+" "+" { } is legal. + if (Tok.hasUDSuffix()) + Diag(Tok, diag::err_invalid_string_udl); SourceLocation Loc = ConsumeStringToken(); ParseScope LinkageScope(this, Scope::DeclScope); @@ -617,8 +622,10 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd){ } ExprResult AssertMessage(ParseStringLiteralExpression()); - if (AssertMessage.isInvalid()) + if (AssertMessage.isInvalid()) { + SkipUntil(tok::semi); return 0; + } T.consumeClose(); diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index fe4ae38e7620..21a2e573c7ee 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -497,14 +497,14 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback { /// unary-operator cast-expression /// 'sizeof' unary-expression /// 'sizeof' '(' type-name ')' -/// [C++0x] 'sizeof' '...' '(' identifier ')' +/// [C++11] 'sizeof' '...' '(' identifier ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' -/// [C++0x] 'alignof' '(' type-id ')' +/// [C++11] 'alignof' '(' type-id ')' /// [GNU] '&&' identifier +/// [C++11] 'noexcept' '(' expression ')' [C++11 5.3.7] /// [C++] new-expression /// [C++] delete-expression -/// [C++0x] 'noexcept' '(' expression ')' /// /// unary-operator: one of /// '&' '*' '+' '-' '~' '!' @@ -516,7 +516,8 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback { /// constant /// string-literal /// [C++] boolean-literal [C++ 2.13.5] -/// [C++0x] 'nullptr' [C++0x 2.14.7] +/// [C++11] 'nullptr' [C++11 2.14.7] +/// [C++11] user-defined-literal /// '(' expression ')' /// [C11] generic-selection /// '__func__' [C99 6.4.2.2] @@ -535,9 +536,9 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback { /// [OBJC] '@encode' '(' type-name ')' /// [OBJC] objc-string-literal /// [C++] simple-type-specifier '(' expression-list[opt] ')' [C++ 5.2.3] -/// [C++0x] simple-type-specifier braced-init-list [C++ 5.2.3] +/// [C++11] simple-type-specifier braced-init-list [C++11 5.2.3] /// [C++] typename-specifier '(' expression-list[opt] ')' [C++ 5.2.3] -/// [C++0x] typename-specifier braced-init-list [C++ 5.2.3] +/// [C++11] typename-specifier braced-init-list [C++11 5.2.3] /// [C++] 'const_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] /// [C++] 'dynamic_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] /// [C++] 'reinterpret_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] @@ -850,7 +851,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::utf8_string_literal: case tok::utf16_string_literal: case tok::utf32_string_literal: - Res = ParseStringLiteralExpression(); + Res = ParseStringLiteralExpression(true); break; case tok::kw__Generic: // primary-expression: generic-selection [C11 6.5.1] Res = ParseGenericSelectionExpression(); @@ -2102,7 +2103,7 @@ Parser::ParseCompoundLiteralExpression(ParsedType Ty, /// /// primary-expression: [C99 6.5.1] /// string-literal -ExprResult Parser::ParseStringLiteralExpression() { +ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { assert(isTokenStringLiteral() && "Not a string literal!"); // String concat. Note that keywords like __func__ and __FUNCTION__ are not @@ -2110,6 +2111,12 @@ ExprResult Parser::ParseStringLiteralExpression() { SmallVector StringToks; do { + if (!AllowUserDefinedLiteral && Tok.hasUDSuffix()) { + Diag(Tok, diag::err_invalid_string_udl); + do ConsumeStringToken(); while (isTokenStringLiteral()); + return ExprError(); + } + StringToks.push_back(Tok); ConsumeStringToken(); } while (isTokenStringLiteral()); diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index d8663077fdf7..c735c6f2b301 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1905,8 +1905,13 @@ bool Parser::ParseUnqualifiedIdOperator(CXXScopeSpec &SS, bool EnteringContext, if (getLang().CPlusPlus0x && Tok.is(tok::string_literal)) { Diag(Tok.getLocation(), diag::warn_cxx98_compat_literal_operator); + // FIXME: Add a FixIt to insert a space before the suffix, and recover. + if (Tok.hasUDSuffix()) { + Diag(Tok.getLocation(), diag::err_literal_operator_missing_space); + return true; + } if (Tok.getLength() != 2) - Diag(Tok.getLocation(), diag::err_operator_string_not_empty); + Diag(Tok.getLocation(), diag::err_literal_operator_string_not_empty); ConsumeStringToken(); if (Tok.isNot(tok::identifier)) { diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp index eea9a7e981ee..5ed8ffec4c5c 100644 --- a/clang/lib/Parse/ParseObjc.cpp +++ b/clang/lib/Parse/ParseObjc.cpp @@ -2011,6 +2011,8 @@ ExprResult Parser::ParseObjCAtExpression(SourceLocation AtLoc) { case tok::string_literal: // primary-expression: string-literal case tok::wide_string_literal: + if (Tok.hasUDSuffix()) + return ExprError(Diag(Tok, diag::err_invalid_string_udl)); return ParsePostfixExpressionSuffix(ParseObjCStringLiteral(AtLoc)); default: if (Tok.getIdentifierInfo() == 0) diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index 0ce73cbeb2b9..cd80c4c96929 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -1797,10 +1797,8 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) { ExprResult AsmString(ParseAsmStringLiteral()); if (AsmString.isInvalid()) { - // If the reason we are recovering is because of an improper string - // literal, it makes the most sense just to consume to the ')'. - if (isTokenStringLiteral()) - T.skipToEnd(); + // Consume up to and including the closing paren. + T.skipToEnd(); return StmtError(); } diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 6a479bc60cb3..8b1765df3930 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -1127,9 +1127,13 @@ Parser::ExprResult Parser::ParseAsmStringLiteral() { switch (Tok.getKind()) { case tok::string_literal: break; + case tok::utf8_string_literal: + case tok::utf16_string_literal: + case tok::utf32_string_literal: case tok::wide_string_literal: { SourceLocation L = Tok.getLocation(); Diag(Tok, diag::err_asm_operand_wide_string_literal) + << (Tok.getKind() == tok::wide_string_literal) << SourceRange(L, L); return ExprError(); } @@ -1138,10 +1142,7 @@ Parser::ExprResult Parser::ParseAsmStringLiteral() { return ExprError(); } - ExprResult Res(ParseStringLiteralExpression()); - if (Res.isInvalid()) return move(Res); - - return move(Res); + return ParseStringLiteralExpression(); } /// ParseSimpleAsm diff --git a/clang/lib/Rewrite/HTMLRewrite.cpp b/clang/lib/Rewrite/HTMLRewrite.cpp index 63b7def52ac8..6a1e3dcdc061 100644 --- a/clang/lib/Rewrite/HTMLRewrite.cpp +++ b/clang/lib/Rewrite/HTMLRewrite.cpp @@ -409,6 +409,7 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) { --TokLen; // FALL THROUGH. case tok::string_literal: + // FIXME: Exclude the optional ud-suffix from the highlighted range. HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, "", ""); break; diff --git a/clang/test/CXX/over/over.oper/over.literal/p8.cpp b/clang/test/CXX/over/over.oper/over.literal/p8.cpp index 69d4e761e559..1837aafc7bc9 100644 --- a/clang/test/CXX/over/over.oper/over.literal/p8.cpp +++ b/clang/test/CXX/over/over.oper/over.literal/p8.cpp @@ -10,7 +10,7 @@ string operator "" _i18n(const char*, std::size_t); // ok // FIXME: This should be accepted once we support UCNs template int operator "" \u03C0(); // ok, UCN for lowercase pi // expected-error {{expected identifier}} // FIXME: Accept this as an extension, with a fix-it to add the space -float operator ""E(const char *); // expected-error {{must be '""'}} expected-error {{expected identifier}} +float operator ""E(const char *); // expected-error {{C++11 requires a space between the "" and the user-defined suffix in a literal operator}} float operator " " B(const char *); // expected-error {{must be '""'}} expected-warning {{hexfloat}} string operator "" 5X(const char *, std::size_t); // expected-error {{expected identifier}} double operator "" _miles(double); // expected-error {{parameter}} diff --git a/clang/test/Lexer/token-concat.cpp b/clang/test/Lexer/token-concat.cpp new file mode 100644 index 000000000000..57dbae03c1e1 --- /dev/null +++ b/clang/test/Lexer/token-concat.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -E -std=c++11 -o - %s | FileCheck %s + +#define id(x) x +id("s")_x // CHECK: "s" _x +id(L"s")_x // CHECK: L"s" _x +id(u8"s")_x // CHECK: u8"s" _x +id(u"s")_x // CHECK: u"s" _x +id(U"s")_x // CHECK: U"s" _x +id('s')_x // CHECK: 's' _x +id(L's')_x // CHECK: L's' _x +id(u's')_x // CHECK: u's' _x +id(U's')_x // CHECK: U's' _x +id("s"_x)_y // CHECK: "s"_x _y +id(1.0_)f // CHECK: 1.0_ f +id(1.0)_f // CHECK: 1.0 _f +id(0xface+)b_count // CHECK: 0xface+ b_count +id("s")1 // CHECK: "s"1 +id("s"_x)1 // CHECK: "s"_x 1 +id(1)_2_x // CHECK: 1 _2_x diff --git a/clang/test/Parser/asm.cpp b/clang/test/Parser/asm.cpp new file mode 100644 index 000000000000..35a497c83a1e --- /dev/null +++ b/clang/test/Parser/asm.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s + +int foo1 asm ("bar1"); +int foo2 asm (L"bar2"); // expected-error {{cannot use wide string literal in 'asm'}} +int foo3 asm (u8"bar3"); // expected-error {{cannot use unicode string literal in 'asm'}} +int foo4 asm (u"bar4"); // expected-error {{cannot use unicode string literal in 'asm'}} +int foo5 asm (U"bar5"); // expected-error {{cannot use unicode string literal in 'asm'}} +int foo6 asm ("bar6"_x); // expected-error {{string literal with user-defined suffix cannot be used here}} diff --git a/clang/test/Parser/cxx11-user-defined-literals.cpp b/clang/test/Parser/cxx11-user-defined-literals.cpp new file mode 100644 index 000000000000..c2d5af5c1dea --- /dev/null +++ b/clang/test/Parser/cxx11-user-defined-literals.cpp @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -fms-extensions -triple x86_64-apple-darwin9.0.0 + +// A ud-suffix cannot be used on string literals in a whole bunch of contexts: + +#include "foo"_bar // expected-error {{expected "FILENAME" or }} +#line 1 "foo"_bar // expected-error {{user-defined suffix cannot be used here}} +# 1 "foo"_bar 1 // expected-error {{user-defined suffix cannot be used here}} +#ident "foo"_bar // expected-error {{user-defined suffix cannot be used here}} +_Pragma("foo"_bar) // expected-error {{user-defined suffix cannot be used here}} +#pragma comment(lib, "foo"_bar) // expected-error {{user-defined suffix cannot be used here}} +_Pragma("comment(lib, \"foo\"_bar)") // expected-error {{user-defined suffix cannot be used here}} +#pragma message "hi"_there // expected-error {{user-defined suffix cannot be used here}} expected-warning {{hi}} +#pragma push_macro("foo"_bar) // expected-error {{user-defined suffix cannot be used here}} +#if __has_warning("-Wan-island-to-discover"_bar) // expected-error {{user-defined suffix cannot be used here}} +#elif __has_include("foo"_bar) // expected-error {{expected "FILENAME" or }} +#endif + +extern "C++"_x {} // expected-error {{user-defined suffix cannot be used here}} expected-error {{unknown linkage language}} + +int f() { + asm("mov %eax, %rdx"_foo); // expected-error {{user-defined suffix cannot be used here}} +} + +static_assert(true, "foo"_bar); // expected-error {{user-defined suffix cannot be used here}} + +int cake() __attribute__((availability(macosx, unavailable, message = "is a lie"_x))); // expected-error {{user-defined suffix cannot be used here}} + +// A ud-suffix cannot be used on character literals in preprocessor constant +// expressions: +#if 'x'_y - u'x'_z // expected-error 2{{character literal with user-defined suffix cannot be used in preprocessor constant expression}} +#error error +#endif + +// But they can appear in expressions. +constexpr char operator"" _id(char c) { return c; } +constexpr wchar_t operator"" _id(wchar_t c) { return c; } +constexpr char16_t operator"" _id(char16_t c) { return c; } +constexpr char32_t operator"" _id(char32_t c) { return c; } + +using size_t = decltype(sizeof(int)); +constexpr const char operator"" _id(const char *p, size_t n) { return *p; } +constexpr const wchar_t operator"" _id(const wchar_t *p, size_t n) { return *p; } +constexpr const char16_t operator"" _id(const char16_t *p, size_t n) { return *p; } +constexpr const char32_t operator"" _id(const char32_t *p, size_t n) { return *p; } + +template struct S {}; +S<"a"_id[0]> sa; +S sb; +S sc; +S sd; +S se; + +S<'w'_id> sw; +S sx; +S sy; +S sz; + +void h() { + (void)"test"_id "test" L"test"; +} diff --git a/clang/test/Parser/objcxx11-user-defined-literal.mm b/clang/test/Parser/objcxx11-user-defined-literal.mm new file mode 100644 index 000000000000..a5f1397530fd --- /dev/null +++ b/clang/test/Parser/objcxx11-user-defined-literal.mm @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s + +id x = @"foo"_bar; // expected-error{{user-defined suffix cannot be used here}}