Lexer: Don't warn about Unicode in preprocessor directives.

This allows people to use Unicode in their #pragma mark and in macros
that exist only to be string-ized.

<rdar://problem/13107323&13121362>

llvm-svn: 174081
This commit is contained in:
Jordan Rose 2013-01-31 19:48:48 +00:00
parent b9baa448b9
commit cc538345be
3 changed files with 19 additions and 2 deletions

View File

@ -2832,7 +2832,8 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
return LexIdentifier(Result, CurPtr);
}
if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
!PP->isPreprocessedOutput() &&
!isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
// Non-ASCII characters tend to creep into source code unintentionally.
// Instead of letting the parser complain about the unknown token,
@ -3537,7 +3538,8 @@ LexNextToken:
if (Status == conversionOK)
return LexUnicode(Result, CodePoint, CurPtr);
if (isLexingRawMode() || PP->isPreprocessedOutput()) {
if (isLexingRawMode() || ParsingPreprocessorDirective ||
PP->isPreprocessedOutput()) {
++CurPtr;
Kind = tok::unknown;
break;

View File

@ -10,6 +10,17 @@ extern int x; // expected-warning {{treating Unicode character as whitespace}}
// CHECK: extern int {{x}}
// CHECK: extern int {{x}}
#pragma mark ¡Unicode!
#define COPYRIGHT Copyright © 2012
#define XSTR(X) #X
#define STR(X) XSTR(X)
static const char *copyright = STR(COPYRIGHT); // no-warning
// CHECK: static const char *copyright = "Copyright © {{2012}}";
#if PP_ONLY
COPYRIGHT
// CHECK: Copyright © {{2012}}
CHECK: The preprocessor should not complain about Unicode characters like ©.
#endif

View File

@ -9,3 +9,7 @@ extern int
// Don't warn about bad UTF-8 in raw lexing mode.
extern int x;
#endif
// Don't warn about bad UTF-8 in preprocessor directives.
#define x82
#pragma mark