diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 4b5a3135c363..1b064c88ff89 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2832,7 +2832,8 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { return LexIdentifier(Result, CurPtr); } - if (!isLexingRawMode() && !PP->isPreprocessedOutput() && + if (!isLexingRawMode() && !ParsingPreprocessorDirective && + !PP->isPreprocessedOutput() && !isASCII(*BufferPtr) && !isAllowedIDChar(C)) { // Non-ASCII characters tend to creep into source code unintentionally. // Instead of letting the parser complain about the unknown token, @@ -3537,7 +3538,8 @@ LexNextToken: if (Status == conversionOK) return LexUnicode(Result, CodePoint, CurPtr); - if (isLexingRawMode() || PP->isPreprocessedOutput()) { + if (isLexingRawMode() || ParsingPreprocessorDirective || + PP->isPreprocessedOutput()) { ++CurPtr; Kind = tok::unknown; break; diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c index 26e77f61fdd0..de758f179a41 100644 --- a/clang/test/Lexer/unicode.c +++ b/clang/test/Lexer/unicode.c @@ -10,6 +10,17 @@ extern int x; // expected-warning {{treating Unicode character as whitespace}} // CHECK: extern int {{x}} // CHECK: extern int {{x}} +#pragma mark ¡Unicode! + +#define COPYRIGHT Copyright © 2012 +#define XSTR(X) #X +#define STR(X) XSTR(X) + +static const char *copyright = STR(COPYRIGHT); // no-warning +// CHECK: static const char *copyright = "Copyright © {{2012}}"; + #if PP_ONLY +COPYRIGHT +// CHECK: Copyright © {{2012}} CHECK: The preprocessor should not complain about Unicode characters like ©. #endif diff --git a/clang/test/Lexer/utf8-invalid.c b/clang/test/Lexer/utf8-invalid.c index a387ff776f43..2657b54c374e 100644 --- a/clang/test/Lexer/utf8-invalid.c +++ b/clang/test/Lexer/utf8-invalid.c @@ -9,3 +9,7 @@ extern int // Don't warn about bad UTF-8 in raw lexing mode. extern int ‚x; #endif + +// Don't warn about bad UTF-8 in preprocessor directives. +#define x82 ‚ +#pragma mark ‚