//===-- Core/IncludeDirectives.cpp - Include directives handling ----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief This file defines the IncludeDirectives class that helps with /// detecting and modifying \#include directives. /// //===----------------------------------------------------------------------===// #include "IncludeDirectives.h" #include "clang/Basic/CharInfo.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include using namespace clang; using namespace clang::tooling; using llvm::StringRef; /// \brief PPCallbacks that fills-in the include information in the given /// \c IncludeDirectives. class IncludeDirectivesPPCallback : public clang::PPCallbacks { // Struct helping the detection of header guards in the various callbacks struct GuardDetection { GuardDetection(FileID FID) : FID(FID), Count(0), TheMacro(0), CountAtEndif(0) {} FileID FID; // count for relevant preprocessor directives unsigned Count; // the macro that is tested in the top most ifndef for the header guard // (e.g: GUARD_H) const IdentifierInfo *TheMacro; // the hash locations of #ifndef, #define, #endif SourceLocation IfndefLoc, DefineLoc, EndifLoc; // the value of Count once the #endif is reached unsigned CountAtEndif; /// \brief Check that with all the information gathered if this is a /// potential header guard. /// /// Meaning a top-most \#ifndef has been found, followed by a define and the /// last preprocessor directive was the terminating \#endif. /// /// FIXME: accept the \#if !defined identifier form too. bool isPotentialHeaderGuard() const { return Count == CountAtEndif && DefineLoc.isValid(); } }; public: IncludeDirectivesPPCallback(IncludeDirectives *Self) : Self(Self), Guard(0) {} private: virtual ~IncludeDirectivesPPCallback() {} void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File, StringRef SearchPath, StringRef RelativePath, const Module *Imported) LLVM_OVERRIDE { SourceManager &SM = Self->Sources; const FileEntry *FE = SM.getFileEntryForID(SM.getFileID(HashLoc)); assert(FE && "Valid file expected."); IncludeDirectives::Entry E(HashLoc, File, IsAngled); Self->FileToEntries[FE].push_back(E); Self->IncludeAsWrittenToLocationsMap[FileName].push_back(HashLoc); } // Keep track of the current file in the stack virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) { SourceManager &SM = Self->Sources; switch (Reason) { case EnterFile: Files.push(GuardDetection(SM.getFileID(Loc))); Guard = &Files.top(); break; case ExitFile: if (Guard->isPotentialHeaderGuard()) handlePotentialHeaderGuard(*Guard); Files.pop(); Guard = &Files.top(); break; default: break; } } /// \brief Mark this header as guarded in the IncludeDirectives if it's a /// proper header guard. void handlePotentialHeaderGuard(const GuardDetection &Guard) { SourceManager &SM = Self->Sources; const FileEntry *File = SM.getFileEntryForID(Guard.FID); const LangOptions &LangOpts = Self->CI.getLangOpts(); // Null file can happen for the buffer for example. They // shouldn't have header guards though... if (!File) return; // The #ifndef should be the next thing after the preamble. We aren't // checking for equality because it can also be part of the preamble if the // preamble is the whole file. unsigned Preamble = Lexer::ComputePreamble(SM.getBuffer(Guard.FID), LangOpts).first; unsigned IfndefOffset = SM.getFileOffset(Guard.IfndefLoc); if (IfndefOffset > (Preamble + 1)) return; // No code is allowed in the code remaining after the #endif. const llvm::MemoryBuffer *Buffer = SM.getBuffer(Guard.FID); Lexer Lex(SM.getLocForStartOfFile(Guard.FID), LangOpts, Buffer->getBufferStart(), Buffer->getBufferStart() + SM.getFileOffset(Guard.EndifLoc), Buffer->getBufferEnd()); // Find the first newline not part of a multi-line comment. Token Tok; Lex.LexFromRawLexer(Tok); // skip endif Lex.LexFromRawLexer(Tok); // Not a proper header guard, the remainder of the file contains something // else than comments or whitespaces. if (Tok.isNot(tok::eof)) return; // Add to the location of the define to the IncludeDirectives for this file. Self->HeaderToGuard[File] = Guard.DefineLoc; } virtual void Ifndef(SourceLocation Loc, const Token &MacroNameTok, const MacroDirective *MD) { Guard->Count++; // If this #ifndef is the top-most directive and the symbol isn't defined // store those information in the guard detection, the next step will be to // check for the define. if (Guard->Count == 1 && MD == 0) { IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); if (MII->hasMacroDefinition()) return; Guard->IfndefLoc = Loc; Guard->TheMacro = MII; } } virtual void MacroDefined(const Token &MacroNameTok, const MacroDirective *MD) { Guard->Count++; // If this #define is the second directive of the file and the symbol // defined is the same as the one checked in the #ifndef then store the // information about this define. if (Guard->Count == 2 && Guard->TheMacro != 0) { IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); // macro unrelated to the ifndef, doesn't look like a proper header guard if (MII->getName() != Guard->TheMacro->getName()) return; Guard->DefineLoc = MacroNameTok.getLocation(); } } virtual void Endif(SourceLocation Loc, SourceLocation IfLoc) { Guard->Count++; // If it's the #endif corresponding to the top-most #ifndef if (Self->Sources.getDecomposedLoc(Guard->IfndefLoc) != Self->Sources.getDecomposedLoc(IfLoc)) return; // And that the top-most #ifndef was followed by the right #define if (Guard->DefineLoc.isInvalid()) return; // Then save the information about this #endif. Once the file is exited we // will check if it was the final preprocessor directive. Guard->CountAtEndif = Guard->Count; Guard->EndifLoc = Loc; } virtual void MacroExpands(const Token &, const MacroDirective *, SourceRange, const MacroArgs *) { Guard->Count++; } virtual void MacroUndefined(const Token &, const MacroDirective *) { Guard->Count++; } virtual void Defined(const Token &, const MacroDirective *, SourceRange) { Guard->Count++; } virtual void If(SourceLocation, SourceRange, bool) { Guard->Count++; } virtual void Elif(SourceLocation, SourceRange, bool, SourceLocation) { Guard->Count++; } virtual void Ifdef(SourceLocation, const Token &, const MacroDirective *) { Guard->Count++; } virtual void Else(SourceLocation, SourceLocation) { Guard->Count++; } IncludeDirectives *Self; // keep track of the guard info through the include stack std::stack Files; // convenience field pointing to Files.top().second GuardDetection *Guard; }; // Flags that describes where to insert newlines. enum NewLineFlags { // Prepend a newline at the beginning of the insertion. NL_Prepend = 0x1, // Prepend another newline at the end of the insertion. NL_PrependAnother = 0x2, // Add two newlines at the end of the insertion. NL_AppendTwice = 0x4, // Convenience value to enable both \c NL_Prepend and \c NL_PrependAnother. NL_PrependTwice = NL_Prepend | NL_PrependAnother }; /// \brief Guess the end-of-line sequence used in the given FileID. If the /// sequence can't be guessed return an Unix-style newline. static StringRef guessEOL(SourceManager &SM, FileID ID) { StringRef Content = SM.getBufferData(ID); StringRef Buffer = Content.substr(Content.find_first_of("\r\n")); return llvm::StringSwitch(Buffer) .StartsWith("\r\n", "\r\n") .StartsWith("\n\r", "\n\r") .StartsWith("\r", "\r") .Default("\n"); } /// \brief Find the end of the end of the directive, either the beginning of a /// newline or the end of file. // // \return The offset into the file where the directive ends along with a // boolean value indicating whether the directive ends because the end of file // was reached or not. static std::pair findDirectiveEnd(SourceLocation HashLoc, SourceManager &SM, const LangOptions &LangOpts) { FileID FID = SM.getFileID(HashLoc); unsigned Offset = SM.getFileOffset(HashLoc); StringRef Content = SM.getBufferData(FID); Lexer Lex(SM.getLocForStartOfFile(FID), LangOpts, Content.begin(), Content.begin() + Offset, Content.end()); Lex.SetCommentRetentionState(true); Token Tok; // This loop look for the newline after our directive but avoids the ones part // of a multi-line comments: // // #include /* long \n comment */\n // ~~ no ~~ yes for (;;) { // find the beginning of the end-of-line sequence StringRef::size_type EOLOffset = Content.find_first_of("\r\n", Offset); // ends because EOF was reached if (EOLOffset == StringRef::npos) return std::make_pair(Content.size(), true); // find the token that contains our end-of-line unsigned TokEnd = 0; do { Lex.LexFromRawLexer(Tok); TokEnd = SM.getFileOffset(Tok.getLocation()) + Tok.getLength(); // happens when the whitespaces are eaten after a multiline comment if (Tok.is(tok::eof)) return std::make_pair(EOLOffset, false); } while (TokEnd < EOLOffset); // the end-of-line is not part of a multi-line comment, return its location if (Tok.isNot(tok::comment)) return std::make_pair(EOLOffset, false); // for the next search to start after the end of this token Offset = TokEnd; } } IncludeDirectives::IncludeDirectives(clang::CompilerInstance &CI) : CI(CI), Sources(CI.getSourceManager()) { // addPPCallbacks takes ownership of the callback CI.getPreprocessor().addPPCallbacks(new IncludeDirectivesPPCallback(this)); } bool IncludeDirectives::lookForInclude(const FileEntry *File, const LocationVec &IncludeLocs, SeenFilesSet &Seen) const { // mark this file as visited Seen.insert(File); // First check if included directly in this file for (LocationVec::const_iterator I = IncludeLocs.begin(), E = IncludeLocs.end(); I != E; ++I) if (Sources.getFileEntryForID(Sources.getFileID(*I)) == File) return true; // Otherwise look recursively all the included files FileToEntriesMap::const_iterator EntriesIt = FileToEntries.find(File); if (EntriesIt == FileToEntries.end()) return false; for (EntryVec::const_iterator I = EntriesIt->second.begin(), E = EntriesIt->second.end(); I != E; ++I) { // skip if this header has already been checked before if (Seen.count(I->getIncludedFile())) continue; if (lookForInclude(I->getIncludedFile(), IncludeLocs, Seen)) return true; } return false; } bool IncludeDirectives::hasInclude(const FileEntry *File, StringRef Include) const { llvm::StringMap::const_iterator It = IncludeAsWrittenToLocationsMap.find(Include); // Include isn't included in any file if (It == IncludeAsWrittenToLocationsMap.end()) return false; SeenFilesSet Seen; return lookForInclude(File, It->getValue(), Seen); } Replacement IncludeDirectives::addAngledInclude(const clang::FileEntry *File, llvm::StringRef Include) { FileID FID = Sources.translateFile(File); assert(!FID.isInvalid() && "Invalid file entry given!"); if (hasInclude(File, Include)) return Replacement(); unsigned Offset, NLFlags; llvm::tie(Offset, NLFlags) = angledIncludeInsertionOffset(FID); StringRef EOL = guessEOL(Sources, FID); llvm::SmallString<32> InsertionText; if (NLFlags & NL_Prepend) InsertionText += EOL; if (NLFlags & NL_PrependAnother) InsertionText += EOL; InsertionText += "#include <"; InsertionText += Include; InsertionText += ">"; if (NLFlags & NL_AppendTwice) { InsertionText += EOL; InsertionText += EOL; } return Replacement(File->getName(), Offset, 0, InsertionText); } Replacement IncludeDirectives::addAngledInclude(llvm::StringRef File, llvm::StringRef Include) { const FileEntry *Entry = Sources.getFileManager().getFile(File); assert(Entry && "Invalid file given!"); return addAngledInclude(Entry, Include); } std::pair IncludeDirectives::findFileHeaderEndOffset(FileID FID) const { unsigned NLFlags = NL_Prepend; StringRef Content = Sources.getBufferData(FID); Lexer Lex(Sources.getLocForStartOfFile(FID), CI.getLangOpts(), Content.begin(), Content.begin(), Content.end()); Lex.SetCommentRetentionState(true); Lex.SetKeepWhitespaceMode(true); // find the first newline not part of a multi-line comment Token Tok; do { Lex.LexFromRawLexer(Tok); unsigned Offset = Sources.getFileOffset(Tok.getLocation()); // allow one newline between the comments if (Tok.is(tok::unknown) && isWhitespace(Content[Offset])) { StringRef Whitespaces(Content.substr(Offset, Tok.getLength())); if (Whitespaces.count('\n') == 1 || Whitespaces.count('\r') == 1) Lex.LexFromRawLexer(Tok); else { // add an empty line to separate the file header and the inclusion NLFlags = NL_PrependTwice; } } } while (Tok.is(tok::comment)); // apparently there is no header, insertion point is the beginning of the file if (Tok.isNot(tok::unknown)) return std::make_pair(0, NL_AppendTwice); return std::make_pair(Sources.getFileOffset(Tok.getLocation()), NLFlags); } SourceLocation IncludeDirectives::angledIncludeHintLoc(FileID FID) const { FileToEntriesMap::const_iterator EntriesIt = FileToEntries.find(Sources.getFileEntryForID(FID)); if (EntriesIt == FileToEntries.end()) return SourceLocation(); HeaderSearch &HeaderInfo = CI.getPreprocessor().getHeaderSearchInfo(); const EntryVec &Entries = EntriesIt->second; EntryVec::const_reverse_iterator QuotedCandidate = Entries.rend(); for (EntryVec::const_reverse_iterator I = Entries.rbegin(), E = Entries.rend(); I != E; ++I) { // Headers meant for multiple inclusion can potentially appears in the // middle of the code thus making them a poor choice for an insertion point. if (!HeaderInfo.isFileMultipleIncludeGuarded(I->getIncludedFile())) continue; // return preferably the last angled include if (I->isAngled()) return I->getHashLocation(); // keep track of the last quoted include that is guarded if (QuotedCandidate == Entries.rend()) QuotedCandidate = I; } if (QuotedCandidate == Entries.rend()) return SourceLocation(); // return the last quoted-include if we couldn't find an angled one return QuotedCandidate->getHashLocation(); } std::pair IncludeDirectives::angledIncludeInsertionOffset(FileID FID) const { SourceLocation Hint = angledIncludeHintLoc(FID); unsigned NL_Flags = NL_Prepend; // If we can't find a similar include and we are in a header check if it's a // guarded header. If so the hint will be the location of the #define from the // guard. if (Hint.isInvalid()) { const FileEntry *File = Sources.getFileEntryForID(FID); HeaderToGuardMap::const_iterator GuardIt = HeaderToGuard.find(File); if (GuardIt != HeaderToGuard.end()) { // get the hash location from the #define Hint = GuardIt->second; // we want a blank line between the #define and the #include NL_Flags = NL_PrependTwice; } } // no hints, insertion is done after the file header if (Hint.isInvalid()) return findFileHeaderEndOffset(FID); unsigned Offset = findDirectiveEnd(Hint, Sources, CI.getLangOpts()).first; return std::make_pair(Offset, NL_Flags); }