diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h index 06068bad7b97..82a1f14ad871 100644 --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -156,6 +156,12 @@ public: const char *GetString() { return &ResultBuf[0]; } unsigned GetStringLength() { return ResultPtr-&ResultBuf[0]; } + + /// getOffsetOfStringByte - This function returns the offset of the + /// specified byte of the string data represented by Token. This handles + /// advancing over escape sequences in the string. + static unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo, + Preprocessor &PP); }; } // end namespace clang diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 93b653aae6f2..9815f9b91e7b 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -798,3 +798,49 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, return; } } + + +/// getOffsetOfStringByte - This function returns the offset of the +/// specified byte of the string data represented by Token. This handles +/// advancing over escape sequences in the string. +unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, + unsigned ByteNo, + Preprocessor &PP) { + // Get the spelling of the token. + llvm::SmallString<16> SpellingBuffer; + SpellingBuffer.resize(Tok.getLength()); + + const char *SpellingPtr = &SpellingBuffer[0]; + unsigned TokLen = PP.getSpelling(Tok, SpellingPtr); + + assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet"); + + + const char *SpellingStart = SpellingPtr; + const char *SpellingEnd = SpellingPtr+TokLen; + + // Skip over the leading quote. + assert(SpellingPtr[0] == '"' && "Should be a string literal!"); + ++SpellingPtr; + + // Skip over bytes until we find the offset we're looking for. + while (ByteNo) { + assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!"); + + // Step over non-escapes simply. + if (*SpellingPtr != '\\') { + ++SpellingPtr; + --ByteNo; + continue; + } + + // Otherwise, this is an escape character. Advance over it. + bool HadError = false; + ProcessCharEscape(SpellingPtr, SpellingEnd, HadError, + Tok.getLocation(), false, PP); + assert(!HadError && "This method isn't valid on erroneous strings"); + --ByteNo; + } + + return SpellingPtr-SpellingStart; +} diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 38cc427a0044..b22932bb9a9f 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -17,6 +17,7 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" +#include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" using namespace clang; @@ -31,7 +32,7 @@ SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, unsigned ByteNo) const { assert(!SL->isWide() && "This doesn't work for wide strings yet"); - llvm::SmallString<32> SpellingBuffer; + llvm::SmallString<16> SpellingBuffer; // Loop over all of the tokens in this string until we find the one that // contains the byte we're looking for. @@ -78,13 +79,15 @@ SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, // The length of the string is the token length minus the two quotes. TokNumBytes -= 2; - // FIXME: This should consider character escapes! - // If the byte is in this token, return the location of the byte. if (ByteNo < TokNumBytes || (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { - // We advance +1 to step over the '"'. - return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1); + unsigned Offset = + StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP); + + // Now that we know the offset of the token in the spelling, use the + // preprocessor to get the offset in the original source. + return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); } // Move to the next string token.