[C++20] Implement context-sensitive header-name lexing and pp-import parsing in the preprocessor.

llvm-svn: 358231
This commit is contained in:
Richard Smith 2019-04-11 21:18:23 +00:00
parent 75f9681874
commit 8af8b8611c
21 changed files with 653 additions and 117 deletions

View File

@ -418,7 +418,8 @@ def warn_pp_hdrstop_filename_ignored : Warning<
"/Fp can be used to specify precompiled header filename">,
InGroup<ClangClPch>;
def err_pp_file_not_found_angled_include_not_fatal : Error<
"'%0' file not found with <angled> include; use \"quotes\" instead">;
"'%0' file not found with <angled> %select{include|import}1; "
"use \"quotes\" instead">;
def err_pp_file_not_found_typo_not_fatal
: Error<"'%0' file not found, did you mean '%1'?">;
def note_pp_framework_without_header : Note<
@ -642,7 +643,8 @@ def err_pp_double_begin_of_arc_cf_code_audited : Error<
def err_pp_unmatched_end_of_arc_cf_code_audited : Error<
"not currently inside '#pragma clang arc_cf_code_audited'">;
def err_pp_include_in_arc_cf_code_audited : Error<
"cannot #include files inside '#pragma clang arc_cf_code_audited'">;
"cannot %select{#include files|import headers}0 "
"inside '#pragma clang arc_cf_code_audited'">;
def err_pp_eof_in_arc_cf_code_audited : Error<
"'#pragma clang arc_cf_code_audited' was not ended within this file">;
@ -776,6 +778,14 @@ def warn_module_conflict : Warning<
"module '%0' conflicts with already-imported module '%1': %2">,
InGroup<ModuleConflict>;
// C++20 modules
def err_header_import_semi_in_macro : Error<
"semicolon terminating header import declaration cannot be produced "
"by a macro">;
def err_header_import_not_header_unit : Error<
"header file %0 (aka '%1') cannot be imported because "
"it is not known to be a header unit">;
def warn_header_guard : Warning<
"%0 is used as a header guard here, followed by #define of a different macro">,
InGroup<DiagGroup<"header-guard">>;
@ -797,7 +807,8 @@ def err_pp_double_begin_of_assume_nonnull : Error<
def err_pp_unmatched_end_of_assume_nonnull : Error<
"not currently inside '#pragma clang assume_nonnull'">;
def err_pp_include_in_assume_nonnull : Error<
"cannot #include files inside '#pragma clang assume_nonnull'">;
"cannot %select{#include files|import headers}0 "
"inside '#pragma clang assume_nonnull'">;
def err_pp_eof_in_assume_nonnull : Error<
"'#pragma clang assume_nonnull' was not ended within this file">;

View File

@ -148,8 +148,9 @@ LANGOPT(Blocks , 1, 0, "blocks extension to C")
BENIGN_LANGOPT(EmitAllDecls , 1, 0, "emitting all declarations")
LANGOPT(MathErrno , 1, 1, "errno in math functions")
BENIGN_LANGOPT(HeinousExtensions , 1, 0, "extensions that we really don't like and may be ripped out at any time")
LANGOPT(Modules , 1, 0, "modules extension to C")
COMPATIBLE_LANGOPT(ModulesTS , 1, 0, "C++ Modules TS")
LANGOPT(Modules , 1, 0, "modules semantics")
COMPATIBLE_LANGOPT(ModulesTS , 1, 0, "C++ Modules TS syntax")
COMPATIBLE_LANGOPT(CPlusPlusModules, 1, 0, "C++ modules syntax")
BENIGN_ENUM_LANGOPT(CompilingModule, CompilingModuleKind, 2, CMK_None,
"compiling a module interface")
BENIGN_LANGOPT(CompilingPCH, 1, 0, "building a pch")

View File

@ -265,7 +265,7 @@ public:
/// Do we need to track the owning module for a local declaration?
bool trackLocalOwningModule() const {
return isCompilingModule() || ModulesLocalVisibility || ModulesTS;
return isCompilingModule() || ModulesLocalVisibility;
}
bool isSignedOverflowDefined() const {

View File

@ -827,6 +827,10 @@ ANNOTATION(module_include)
ANNOTATION(module_begin)
ANNOTATION(module_end)
// Annotation for a header_name token that has been looked up and transformed
// into the name of a header unit.
ANNOTATION(header_unit)
#undef ANNOTATION
#undef TESTING_KEYWORD
#undef OBJC_AT_KEYWORD

View File

@ -285,6 +285,84 @@ class Preprocessor {
/// Whether the last token we lexed was an '@'.
bool LastTokenWasAt = false;
/// A position within a C++20 import-seq.
class ImportSeq {
public:
enum State : int {
// Positive values represent a number of unclosed brackets.
AtTopLevel = 0,
AfterTopLevelTokenSeq = -1,
AfterExport = -2,
AfterImportSeq = -3,
};
ImportSeq(State S) : S(S) {}
/// Saw any kind of open bracket.
void handleOpenBracket() {
S = static_cast<State>(std::max<int>(S, 0) + 1);
}
/// Saw any kind of close bracket other than '}'.
void handleCloseBracket() {
S = static_cast<State>(std::max<int>(S, 1) - 1);
}
/// Saw a close brace.
void handleCloseBrace() {
handleCloseBracket();
if (S == AtTopLevel && !AfterHeaderName)
S = AfterTopLevelTokenSeq;
}
/// Saw a semicolon.
void handleSemi() {
if (atTopLevel()) {
S = AfterTopLevelTokenSeq;
AfterHeaderName = false;
}
}
/// Saw an 'export' identifier.
void handleExport() {
if (S == AfterTopLevelTokenSeq)
S = AfterExport;
else if (S <= 0)
S = AtTopLevel;
}
/// Saw an 'import' identifier.
void handleImport() {
if (S == AfterTopLevelTokenSeq || S == AfterExport)
S = AfterImportSeq;
else if (S <= 0)
S = AtTopLevel;
}
/// Saw a 'header-name' token; do not recognize any more 'import' tokens
/// until we reach a top-level semicolon.
void handleHeaderName() {
if (S == AfterImportSeq)
AfterHeaderName = true;
handleMisc();
}
/// Saw any other token.
void handleMisc() {
if (S <= 0)
S = AtTopLevel;
}
bool atTopLevel() { return S <= 0; }
bool afterImportSeq() { return S == AfterImportSeq; }
private:
State S;
/// Whether we're in the pp-import-suffix following the header-name in a
/// pp-import. If so, a close-brace is not sufficient to end the
/// top-level-token-seq of an import-seq.
bool AfterHeaderName = false;
};
/// Our current position within a C++20 import-seq.
ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
/// Whether the module import expects an identifier next. Otherwise,
/// it expects a '.' or ';'.
bool ModuleImportExpectsIdentifier = false;
@ -1266,7 +1344,8 @@ public:
/// Lex a token, forming a header-name token if possible.
bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
void LexAfterModuleImport(Token &Result);
bool LexAfterModuleImport(Token &Result);
void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
void makeModuleVisible(Module *M, SourceLocation Loc);
@ -1813,7 +1892,11 @@ public:
/// If not, emit a diagnostic and consume up until the eod.
/// If \p EnableMacros is true, then we consider macros that expand to zero
/// tokens as being ok.
void CheckEndOfDirective(const char *DirType, bool EnableMacros = false);
///
/// \return The location of the end of the directive (the terminating
/// newline).
SourceLocation CheckEndOfDirective(const char *DirType,
bool EnableMacros = false);
/// Read and discard all tokens remaining on the current line until
/// the tok::eod token is found. Returns the range of the skipped tokens.
@ -2052,7 +2135,7 @@ private:
//===--------------------------------------------------------------------===//
// Caching stuff.
void CachingLex(Token &Result);
void CachingLex(Token &Result, bool &IsNewToken);
bool InCachingLexMode() const {
// If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
@ -2082,12 +2165,31 @@ private:
void HandleMacroPublicDirective(Token &Tok);
void HandleMacroPrivateDirective();
/// An additional notification that can be produced by a header inclusion or
/// import to tell the parser what happened.
struct ImportAction {
enum ActionKind {
None,
ModuleBegin,
ModuleImport,
} Kind;
Module *ModuleForHeader = nullptr;
ImportAction(ActionKind AK, Module *Mod = nullptr)
: Kind(AK), ModuleForHeader(Mod) {
assert((AK == None || Mod) && "no module for module action");
}
};
// File inclusion.
void HandleIncludeDirective(SourceLocation HashLoc,
Token &Tok,
void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
const DirectoryLookup *LookupFrom = nullptr,
const FileEntry *LookupFromFile = nullptr,
bool isImport = false);
const FileEntry *LookupFromFile = nullptr);
ImportAction
HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
Token &FilenameTok, SourceLocation EndLoc,
const DirectoryLookup *LookupFrom = nullptr,
const FileEntry *LookupFromFile = nullptr);
void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
void HandleImportDirective(SourceLocation HashLoc, Token &Tok);

View File

@ -218,7 +218,7 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
if (LangOpts.DeclSpecKeyword)
AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
// Add the '_experimental_modules_import' contextual keyword.
// Add the 'import' contextual keyword.
get("import").setModulesImport(true);
}

View File

@ -2594,13 +2594,18 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
Args.hasFlag(OPT_fdouble_square_bracket_attributes,
OPT_fno_double_square_bracket_attributes, Opts.CPlusPlus11);
Opts.CPlusPlusModules = Opts.CPlusPlus2a;
Opts.ModulesTS = Args.hasArg(OPT_fmodules_ts);
Opts.Modules = Args.hasArg(OPT_fmodules) || Opts.ModulesTS;
Opts.Modules =
Args.hasArg(OPT_fmodules) || Opts.ModulesTS || Opts.CPlusPlusModules;
Opts.ModulesStrictDeclUse = Args.hasArg(OPT_fmodules_strict_decluse);
Opts.ModulesDeclUse =
Args.hasArg(OPT_fmodules_decluse) || Opts.ModulesStrictDeclUse;
// FIXME: We only need this in C++ modules / Modules TS if we might textually
// enter a different module (eg, when building a header unit).
Opts.ModulesLocalVisibility =
Args.hasArg(OPT_fmodules_local_submodule_visibility) || Opts.ModulesTS;
Args.hasArg(OPT_fmodules_local_submodule_visibility) || Opts.ModulesTS ||
Opts.CPlusPlusModules;
Opts.ModulesCodegen = Args.hasArg(OPT_fmodules_codegen);
Opts.ModulesDebugInfo = Args.hasArg(OPT_fmodules_debuginfo);
Opts.ModulesSearchAll = Opts.Modules &&

View File

@ -769,6 +769,15 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
reinterpret_cast<Module *>(Tok.getAnnotationValue()));
PP.Lex(Tok);
continue;
} else if (Tok.is(tok::annot_header_unit)) {
// This is a header-name that has been (effectively) converted into a
// module-name.
// FIXME: The module name could contain non-identifier module name
// components. We don't have a good way to round-trip those.
Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
std::string Name = M->getFullModuleName();
OS.write(Name.data(), Name.size());
Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
} else if (Tok.isAnnotation()) {
// Ignore annotation tokens created by pragmas - the pragmas themselves
// will be reproduced in the preprocessed output.
@ -790,12 +799,12 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
Callbacks->HandleNewlinesInToken(TokPtr, Len);
} else {
std::string S = PP.getSpelling(Tok);
OS.write(&S[0], S.size());
OS.write(S.data(), S.size());
// Tokens that can contain embedded newlines need to adjust our current
// line number.
if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
Callbacks->HandleNewlinesInToken(&S[0], S.size());
Callbacks->HandleNewlinesInToken(S.data(), S.size());
}
Callbacks->setEmittedTokensOnThisLine();

View File

@ -45,7 +45,7 @@ void Preprocessor::Backtrack() {
recomputeCurLexerKind();
}
void Preprocessor::CachingLex(Token &Result) {
void Preprocessor::CachingLex(Token &Result, bool &IsNewToken) {
if (!InCachingLexMode())
return;
@ -55,6 +55,7 @@ void Preprocessor::CachingLex(Token &Result) {
if (CachedLexPos < CachedTokens.size()) {
Result = CachedTokens[CachedLexPos++];
IsNewToken = false;
return;
}

View File

@ -336,7 +336,10 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
///
/// If not, emit a diagnostic and consume up until the eod. If EnableMacros is
/// true, then we consider macros that expand to zero tokens as being ok.
void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) {
///
/// Returns the location of the end of the directive.
SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
bool EnableMacros) {
Token Tmp;
// Lex unexpanded tokens for most directives: macros might expand to zero
// tokens, causing us to miss diagnosing invalid lines. Some directives (like
@ -351,18 +354,19 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) {
while (Tmp.is(tok::comment)) // Skip comments in -C mode.
LexUnexpandedToken(Tmp);
if (Tmp.isNot(tok::eod)) {
// Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,
// or if this is a macro-style preprocessing directive, because it is more
// trouble than it is worth to insert /**/ and check that there is no /**/
// in the range also.
FixItHint Hint;
if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
!CurTokenLexer)
Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
DiscardUntilEndOfDirective();
}
if (Tmp.is(tok::eod))
return Tmp.getLocation();
// Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,
// or if this is a macro-style preprocessing directive, because it is more
// trouble than it is worth to insert /**/ and check that there is no /**/
// in the range also.
FixItHint Hint;
if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
!CurTokenLexer)
Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
return DiscardUntilEndOfDirective().getEnd();
}
/// SkipExcludedConditionalBlock - We just read a \#if or related directive and
@ -1509,7 +1513,13 @@ static void diagnoseAutoModuleImport(
Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
SourceLocation PathEnd) {
assert(PP.getLangOpts().ObjC && "no import syntax available");
StringRef ImportKeyword;
if (PP.getLangOpts().ObjC)
ImportKeyword = "@import";
else if (PP.getLangOpts().ModulesTS || PP.getLangOpts().CPlusPlusModules)
ImportKeyword = "import";
else
return; // no import syntax available
SmallString<128> PathString;
for (size_t I = 0, N = Path.size(); I != N; ++I) {
@ -1544,8 +1554,8 @@ static void diagnoseAutoModuleImport(
/*IsTokenRange=*/false);
PP.Diag(HashLoc, diag::warn_auto_module_import)
<< IncludeKind << PathString
<< FixItHint::CreateReplacement(ReplaceRange,
("@import " + PathString + ";").str());
<< FixItHint::CreateReplacement(
ReplaceRange, (ImportKeyword + " " + PathString + ";").str());
}
// Given a vector of path components and a string containing the real
@ -1615,8 +1625,7 @@ bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
Token &IncludeTok,
const DirectoryLookup *LookupFrom,
const FileEntry *LookupFromFile,
bool isImport) {
const FileEntry *LookupFromFile) {
Token FilenameTok;
if (LexHeaderName(FilenameTok))
return;
@ -1628,32 +1637,66 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
return;
}
// Verify that there is nothing after the filename, other than EOD. Note
// that we allow macros that expand to nothing after the filename, because
// this falls into the category of "#include pp-tokens new-line" specified
// in C99 6.10.2p4.
SourceLocation EndLoc =
CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
EndLoc, LookupFrom, LookupFromFile);
switch (Action.Kind) {
case ImportAction::None:
break;
case ImportAction::ModuleBegin:
EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
tok::annot_module_begin, Action.ModuleForHeader);
break;
case ImportAction::ModuleImport:
EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
tok::annot_module_include, Action.ModuleForHeader);
break;
}
}
/// Handle either a #include-like directive or an import declaration that names
/// a header file.
///
/// \param HashLoc The location of the '#' token for an include, or
/// SourceLocation() for an import declaration.
/// \param IncludeTok The include / include_next / import token.
/// \param FilenameTok The header-name token.
/// \param EndLoc The location at which any imported macros become visible.
/// \param LookupFrom For #include_next, the starting directory for the
/// directory lookup.
/// \param LookupFromFile For #include_next, the starting file for the directory
/// lookup.
Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
SourceLocation EndLoc, const DirectoryLookup *LookupFrom,
const FileEntry *LookupFromFile) {
SmallString<128> FilenameBuffer;
StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
SourceLocation CharEnd = FilenameTok.getEndLoc();
CharSourceRange FilenameRange
= CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
SourceRange DirectiveRange(HashLoc, FilenameTok.getLocation());
StringRef OriginalFilename = Filename;
bool isAngled =
GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
// If GetIncludeFilenameSpelling set the start ptr to null, there was an
// error.
if (Filename.empty()) {
DiscardUntilEndOfDirective();
return;
}
if (Filename.empty())
return {ImportAction::None};
// Verify that there is nothing after the filename, other than EOD. Note that
// we allow macros that expand to nothing after the filename, because this
// falls into the category of "#include pp-tokens new-line" specified in
// C99 6.10.2p4.
CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
bool IsImportDecl = HashLoc.isInvalid();
SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
// Complain about attempts to #include files in an audit pragma.
if (PragmaARCCFCodeAuditedLoc.isValid()) {
Diag(HashLoc, diag::err_pp_include_in_arc_cf_code_audited);
Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
Diag(PragmaARCCFCodeAuditedLoc, diag::note_pragma_entered_here);
// Immediately leave the pragma.
@ -1662,7 +1705,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
// Complain about attempts to #include files in an assume-nonnull pragma.
if (PragmaAssumeNonNullLoc.isValid()) {
Diag(HashLoc, diag::err_pp_include_in_assume_nonnull);
Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
// Immediately leave the pragma.
@ -1737,7 +1780,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
if (File) {
Diag(FilenameTok,
diag::err_pp_file_not_found_angled_include_not_fatal)
<< Filename
<< Filename << IsImportDecl
<< FixItHint::CreateReplacement(FilenameRange,
"\"" + Filename.str() + "\"");
}
@ -1810,7 +1853,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
if (isPCHThroughHeader(File))
SkippingUntilPCHThroughHeader = false;
return;
return {ImportAction::None};
}
// Should we enter the source file? Set to Skip if either the source file is
@ -1845,7 +1888,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
Diag(FilenameTok.getLocation(),
diag::note_implicit_top_level_module_import_here)
<< SuggestedModule.getModule()->getTopLevelModuleName();
return;
return {ImportAction::None};
}
// Compute the module access path corresponding to this module.
@ -1858,9 +1901,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
std::reverse(Path.begin(), Path.end());
// Warn that we're replacing the include/import with a module import.
// We only do this in Objective-C, where we have a module-import syntax.
if (getLangOpts().ObjC)
diagnoseAutoModuleImport(*this, HashLoc, IncludeTok, Path, CharEnd);
if (!IsImportDecl)
diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
// Load the module to import its macros. We'll make the declarations
// visible when the parser gets here.
@ -1893,7 +1935,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
CurLexer->cutOffLexing();
}
return;
return {ImportAction::None};
}
}
@ -1905,10 +1947,19 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
if (File)
FileCharacter = std::max(HeaderInfo.getFileDirFlavor(File), FileCharacter);
// If this is a '#import' or an import-declaration, don't re-enter the file.
//
// FIXME: If we have a suggested module for a '#include', and we've already
// visited this file, don't bother entering it again. We know it has no
// further effect.
bool EnterOnce =
IsImportDecl ||
IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
// Ask HeaderInfo if we should enter this #include file. If not, #including
// this file will have no effect.
if (Action == Enter && File &&
!HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
!HeaderInfo.ShouldEnterIncludeFile(*this, File, EnterOnce,
getLangOpts().Modules,
SuggestedModule.getModule())) {
// Even if we've already preprocessed this header once and know that we
@ -1921,8 +1972,9 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
}
if (Callbacks) {
if (Callbacks && !IsImportDecl) {
// Notify the callback object that we've seen an inclusion directive.
// FIXME: Use a different callback for a pp-import?
Callbacks->InclusionDirective(
HashLoc, IncludeTok,
LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled,
@ -1934,10 +1986,15 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
}
if (!File)
return;
return {ImportAction::None};
// FIXME: If we have a suggested module, and we've already visited this file,
// don't bother entering it again. We know it has no further effect.
// If this is a C++20 pp-import declaration, diagnose if we didn't find any
// module corresponding to the named header.
if (IsImportDecl && !SuggestedModule) {
Diag(FilenameTok, diag::err_header_import_not_header_unit)
<< OriginalFilename << File->getName();
return {ImportAction::None};
}
// Issue a diagnostic if the name of the file on disk has a different case
// than the one we're about to open.
@ -1977,24 +2034,25 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
switch (Action) {
case Skip:
// If we don't need to enter the file, stop now.
return;
return {ImportAction::None};
case IncludeLimitReached:
// If we reached our include limit and don't want to enter any more files,
// don't go any further.
return;
return {ImportAction::None};
case Import: {
// If this is a module import, make it visible if needed.
Module *M = SuggestedModule.getModule();
assert(M && "no module to import");
makeModuleVisible(M, HashLoc);
makeModuleVisible(M, EndLoc);
if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
tok::pp___include_macros)
EnterAnnotationToken(DirectiveRange, tok::annot_module_include, M);
return;
return {ImportAction::None};
return {ImportAction::ModuleImport, M};
}
case Enter:
@ -2005,7 +2063,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
Diag(FilenameTok, diag::err_pp_include_too_deep);
HasReachedMaxIncludeDepth = true;
return;
return {ImportAction::None};
}
// Look up the file, create a File ID for it.
@ -2019,7 +2077,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
// If all is good, enter the new file!
if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation()))
return;
return {ImportAction::None};
// Determine if we're switching to building a new submodule, and which one.
if (auto *M = SuggestedModule.getModule()) {
@ -2030,7 +2088,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
<< M->getFullModuleName();
Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
diag::note_previous_definition);
return;
return {ImportAction::None};
}
// When building a pch, -fmodule-name tells the compiler to textually
// include headers in the specified module. We are not building the
@ -2043,21 +2101,23 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
if (getLangOpts().CompilingPCH &&
isForModuleBuilding(M, getLangOpts().CurrentModule,
getLangOpts().ModuleName))
return;
return {ImportAction::None};
assert(!CurLexerSubmodule && "should not have marked this as a module yet");
CurLexerSubmodule = M;
// Let the macro handling code know that any future macros are within
// the new submodule.
EnterSubmodule(M, HashLoc, /*ForPragma*/false);
EnterSubmodule(M, EndLoc, /*ForPragma*/false);
// Let the parser know that any future declarations are within the new
// submodule.
// FIXME: There's no point doing this if we're handling a #__include_macros
// directive.
EnterAnnotationToken(DirectiveRange, tok::annot_module_begin, M);
return {ImportAction::ModuleBegin, M};
}
return {ImportAction::None};
}
/// HandleIncludeNextDirective - Implements \#include_next.
@ -2122,7 +2182,7 @@ void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
return HandleMicrosoftImportDirective(ImportTok);
Diag(ImportTok, diag::ext_pp_import_directive);
}
return HandleIncludeDirective(HashLoc, ImportTok, nullptr, nullptr, true);
return HandleIncludeDirective(HashLoc, ImportTok);
}
/// HandleIncludeMacrosDirective - The -imacros command line option turns into a

View File

@ -866,6 +866,7 @@ void Preprocessor::Lex(Token &Result) {
// We loop here until a lex function returns a token; this avoids recursion.
bool ReturnedToken;
bool IsNewToken = true;
do {
switch (CurLexerKind) {
case CLK_Lexer:
@ -875,12 +876,11 @@ void Preprocessor::Lex(Token &Result) {
ReturnedToken = CurTokenLexer->Lex(Result);
break;
case CLK_CachingLexer:
CachingLex(Result);
CachingLex(Result, IsNewToken);
ReturnedToken = true;
break;
case CLK_LexAfterModuleImport:
LexAfterModuleImport(Result);
ReturnedToken = true;
ReturnedToken = LexAfterModuleImport(Result);
break;
}
} while (!ReturnedToken);
@ -894,6 +894,47 @@ void Preprocessor::Lex(Token &Result) {
Result.setIdentifierInfo(nullptr);
}
// Update ImportSeqState to track our position within a C++20 import-seq
// if this token is being produced as a result of phase 4 of translation.
if (getLangOpts().CPlusPlusModules && LexLevel == 1 && IsNewToken) {
switch (Result.getKind()) {
case tok::l_paren: case tok::l_square: case tok::l_brace:
ImportSeqState.handleOpenBracket();
break;
case tok::r_paren: case tok::r_square:
ImportSeqState.handleCloseBracket();
break;
case tok::r_brace:
ImportSeqState.handleCloseBrace();
break;
case tok::semi:
ImportSeqState.handleSemi();
break;
case tok::header_name:
case tok::annot_header_unit:
ImportSeqState.handleHeaderName();
break;
case tok::kw_export:
ImportSeqState.handleExport();
break;
case tok::identifier:
if (Result.getIdentifierInfo()->isModulesImport()) {
ImportSeqState.handleImport();
if (ImportSeqState.afterImportSeq()) {
ModuleImportLoc = Result.getLocation();
ModuleImportPath.clear();
ModuleImportExpectsIdentifier = true;
CurLexerKind = CLK_LexAfterModuleImport;
}
break;
}
LLVM_FALLTHROUGH;
default:
ImportSeqState.handleMisc();
break;
}
}
LastTokenWasAt = Result.is(tok::at);
--LexLevel;
}
@ -902,8 +943,8 @@ void Preprocessor::Lex(Token &Result) {
/// \p AllowConcatenation is \c true).
///
/// \param FilenameTok Filled in with the next token. On success, this will
/// be either an angle_header_name or a string_literal token. On
/// failure, it will be whatever other token was found instead.
/// be either a header_name token. On failure, it will be whatever other
/// token was found instead.
/// \param AllowMacroExpansion If \c true, allow the header name to be formed
/// by macro expansion (concatenating tokens as necessary if the first
/// token is a '<').
@ -921,6 +962,10 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
// case, glue the tokens together into an angle_string_literal token.
SmallString<128> FilenameBuffer;
if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
bool StartOfLine = FilenameTok.isAtStartOfLine();
bool LeadingSpace = FilenameTok.hasLeadingSpace();
bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
SourceLocation Start = FilenameTok.getLocation();
SourceLocation End;
FilenameBuffer.push_back('<');
@ -970,6 +1015,9 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
FilenameTok.startToken();
FilenameTok.setKind(tok::header_name);
FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
CreateString(FilenameBuffer, FilenameTok, Start, End);
} else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
// Convert a string-literal token of the form " h-char-sequence "
@ -990,14 +1038,148 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
return false;
}
/// Collect the tokens of a C++20 pp-import-suffix.
void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
// FIXME: For error recovery, consider recognizing attribute syntax here
// and terminating / diagnosing a missing semicolon if we find anything
// else? (Can we leave that to the parser?)
unsigned BracketDepth = 0;
while (true) {
Toks.emplace_back();
Lex(Toks.back());
switch (Toks.back().getKind()) {
case tok::l_paren: case tok::l_square: case tok::l_brace:
++BracketDepth;
break;
case tok::r_paren: case tok::r_square: case tok::r_brace:
if (BracketDepth == 0)
return;
--BracketDepth;
break;
case tok::semi:
if (BracketDepth == 0)
return;
break;
case tok::eof:
return;
default:
break;
}
}
}
/// Lex a token following the 'import' contextual keyword.
///
void Preprocessor::LexAfterModuleImport(Token &Result) {
/// pp-import: [C++20]
/// import header-name pp-import-suffix[opt] ;
/// import header-name-tokens pp-import-suffix[opt] ;
/// [ObjC] @ import module-name ;
/// [Clang] import module-name ;
///
/// header-name-tokens:
/// string-literal
/// < [any sequence of preprocessing-tokens other than >] >
///
/// module-name:
/// module-name-qualifier[opt] identifier
///
/// module-name-qualifier
/// module-name-qualifier[opt] identifier .
///
/// We respond to a pp-import by importing macros from the named module.
bool Preprocessor::LexAfterModuleImport(Token &Result) {
// Figure out what kind of lexer we actually have.
recomputeCurLexerKind();
// Lex the next token.
Lex(Result);
// Lex the next token. The header-name lexing rules are used at the start of
// a pp-import.
//
// For now, we only support header-name imports in C++20 mode.
// FIXME: Should we allow this in all language modes that support an import
// declaration as an extension?
if (ModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
if (LexHeaderName(Result))
return true;
} else {
Lex(Result);
}
// Allocate a holding buffer for a sequence of tokens and introduce it into
// the token stream.
auto EnterTokens = [this](ArrayRef<Token> Toks) {
auto ToksCopy = llvm::make_unique<Token[]>(Toks.size());
std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
EnterTokenStream(std::move(ToksCopy), Toks.size(),
/*DisableMacroExpansion*/ true);
};
// Check for a header-name.
SmallVector<Token, 32> Suffix;
if (Result.is(tok::header_name)) {
// Enter the header-name token into the token stream; a Lex action cannot
// both return a token and cache tokens (doing so would corrupt the token
// cache if the call to Lex comes from CachingLex / PeekAhead).
Suffix.push_back(Result);
// Consume the pp-import-suffix and expand any macros in it now. We'll add
// it back into the token stream later.
CollectPpImportSuffix(Suffix);
if (Suffix.back().isNot(tok::semi)) {
// This is not a pp-import after all.
EnterTokens(Suffix);
return false;
}
// C++2a [cpp.module]p1:
// The ';' preprocessing-token terminating a pp-import shall not have
// been produced by macro replacement.
SourceLocation SemiLoc = Suffix.back().getLocation();
if (SemiLoc.isMacroID())
Diag(SemiLoc, diag::err_header_import_semi_in_macro);
// Reconstitute the import token.
Token ImportTok;
ImportTok.startToken();
ImportTok.setKind(tok::kw_import);
ImportTok.setLocation(ModuleImportLoc);
ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
ImportTok.setLength(6);
auto Action = HandleHeaderIncludeOrImport(
/*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
switch (Action.Kind) {
case ImportAction::None:
break;
case ImportAction::ModuleBegin:
// Let the parser know we're textually entering the module.
Suffix.emplace_back();
Suffix.back().startToken();
Suffix.back().setKind(tok::annot_module_begin);
Suffix.back().setLocation(SemiLoc);
Suffix.back().setAnnotationEndLoc(SemiLoc);
Suffix.back().setAnnotationValue(Action.ModuleForHeader);
LLVM_FALLTHROUGH;
case ImportAction::ModuleImport:
// We chose to import (or textually enter) the file. Convert the
// header-name token into a header unit annotation token.
Suffix[0].setKind(tok::annot_header_unit);
Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
Suffix[0].setAnnotationValue(Action.ModuleForHeader);
// FIXME: Call the moduleImport callback?
break;
}
EnterTokens(Suffix);
return false;
}
// The token sequence
//
@ -1012,7 +1194,7 @@ void Preprocessor::LexAfterModuleImport(Token &Result) {
Result.getLocation()));
ModuleImportExpectsIdentifier = false;
CurLexerKind = CLK_LexAfterModuleImport;
return;
return true;
}
// If we're expecting a '.' or a ';', and we got a '.', then wait until we
@ -1021,40 +1203,61 @@ void Preprocessor::LexAfterModuleImport(Token &Result) {
if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
ModuleImportExpectsIdentifier = true;
CurLexerKind = CLK_LexAfterModuleImport;
return;
return true;
}
// If we have a non-empty module path, load the named module.
if (!ModuleImportPath.empty()) {
// Under the Modules TS, the dot is just part of the module name, and not
// a real hierarchy separator. Flatten such module names now.
//
// FIXME: Is this the right level to be performing this transformation?
std::string FlatModuleName;
if (getLangOpts().ModulesTS) {
for (auto &Piece : ModuleImportPath) {
if (!FlatModuleName.empty())
FlatModuleName += ".";
FlatModuleName += Piece.first->getName();
}
SourceLocation FirstPathLoc = ModuleImportPath[0].second;
ModuleImportPath.clear();
ModuleImportPath.push_back(
std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
}
// If we didn't recognize a module name at all, this is not a (valid) import.
if (ModuleImportPath.empty() || Result.is(tok::eof))
return true;
Module *Imported = nullptr;
if (getLangOpts().Modules) {
Imported = TheModuleLoader.loadModule(ModuleImportLoc,
ModuleImportPath,
Module::Hidden,
/*IsIncludeDirective=*/false);
if (Imported)
makeModuleVisible(Imported, ModuleImportLoc);
// Consume the pp-import-suffix and expand any macros in it now, if we're not
// at the semicolon already.
SourceLocation SemiLoc = Result.getLocation();
if (Result.isNot(tok::semi)) {
Suffix.push_back(Result);
CollectPpImportSuffix(Suffix);
if (Suffix.back().isNot(tok::semi)) {
// This is not an import after all.
EnterTokens(Suffix);
return false;
}
if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
SemiLoc = Suffix.back().getLocation();
}
// Under the Modules TS, the dot is just part of the module name, and not
// a real hierarchy separator. Flatten such module names now.
//
// FIXME: Is this the right level to be performing this transformation?
std::string FlatModuleName;
if (getLangOpts().ModulesTS || getLangOpts().CPlusPlusModules) {
for (auto &Piece : ModuleImportPath) {
if (!FlatModuleName.empty())
FlatModuleName += ".";
FlatModuleName += Piece.first->getName();
}
SourceLocation FirstPathLoc = ModuleImportPath[0].second;
ModuleImportPath.clear();
ModuleImportPath.push_back(
std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
}
Module *Imported = nullptr;
if (getLangOpts().Modules) {
Imported = TheModuleLoader.loadModule(ModuleImportLoc,
ModuleImportPath,
Module::Hidden,
/*IsIncludeDirective=*/false);
if (Imported)
makeModuleVisible(Imported, SemiLoc);
}
if (Callbacks)
Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
if (!Suffix.empty()) {
EnterTokens(Suffix);
return false;
}
return true;
}
void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {

View File

@ -160,6 +160,11 @@ static char GetFirstChar(const Preprocessor &PP, const Token &Tok) {
bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
const Token &PrevTok,
const Token &Tok) const {
// Conservatively assume that every annotation token that has a printable
// form requires whitespace.
if (PrevTok.isAnnotation())
return true;
// First, check to see if the tokens were directly adjacent in the original
// source. If they were, it must be okay to stick them together: if there
// were an issue, the tokens would have been lexed differently.

View File

@ -0,0 +1 @@
#define ATTRS [[ ]]

View File

@ -0,0 +1,18 @@
// RUN: %clang_cc1 -std=c++2a -emit-header-module -fmodule-name=attrs -x c++-header %S/Inputs/empty.h %S/Inputs/attrs.h -o %t.pcm
// RUN: %clang_cc1 -std=c++2a %s -fmodule-file=%t.pcm -E -verify -I%S/Inputs | FileCheck %s
#define SEMI ;
// expected-error@+1 {{semicolon terminating header import declaration cannot be produced by a macro}}
import "empty.h" SEMI // CHECK: import attrs.{{.*}};
#define IMPORT import "empty.h"
IMPORT; // CHECK: import attrs.{{.*}};
#define IMPORT_ANGLED import <empty.h>
IMPORT_ANGLED; // CHECK: import attrs.{{.*}};
// Ensure that macros only become visible at the semicolon.
// CHECK: import attrs.{{.*}} ATTRS ;
import "attrs.h" ATTRS ;
// CHECK: {{\[\[}} ]] int n;
ATTRS int n;

View File

@ -0,0 +1,33 @@
// RUN: %clang_cc1 -std=c++2a -emit-header-module -fmodule-name=attrs -x c++-header %S/Inputs/empty.h %S/Inputs/attrs.h -o %t.pcm
// RUN: %clang_cc1 -std=c++2a %s -fmodule-file=%t.pcm -fsyntax-only -verify -I%S/Inputs
template<int> struct import; // expected-note 2{{previous}}
constexpr struct { int h; } empty = {0};
struct A;
struct B;
struct C;
template<> struct import<0> {
static A a;
static B b;
static C c;
};
// OK, not an import-declaration.
// FIXME: This is valid, see PR41192
struct A {} // FIXME expected-error {{expected ';'}}
::import
<empty.h>::a; // FIXME expected-error {{requires a type specifier}}
// This is invalid: the tokens after 'import' are a header-name, so cannot be
// parsed as a template-argument-list.
struct B {}
import // expected-error {{redefinition of 'import'}} expected-error {{expected ';'}}
<empty.h>::b; // (error recovery skips these tokens)
// Likewise, this is ill-formed after the tokens are reconstituted into a
// header-name token.
struct C {}
import // expected-error {{redefinition of 'import'}} expected-error {{expected ';'}}
<
empty.h // (error recovery skips these tokens)
>::c;

View File

@ -0,0 +1 @@
#error ERROR: This file should never actually be included

View File

@ -0,0 +1 @@
#error ERROR: This file should never actually be included

View File

@ -0,0 +1,81 @@
// RUN: not %clang_cc1 -std=c++2a -E -I%S/Inputs %s -o - | FileCheck %s --strict-whitespace --implicit-check-not=ERROR
// Check for context-sensitive header-name token formation.
// CHECK: import <foo bar>;
import <foo bar>;
// Not at the top level: these are each 8 tokens rather than 5.
// CHECK: { import <foo bar>; }
{ import <foo bar>; }
// CHECK: ( import <foo bar>; :>
( import <foo bar>; :>
// CHECK: [ import <foo bar>; %>
[ import <foo bar>; %>
// CHECK: import <foo bar>;
import <foo bar>;
// CHECK: foo; import <foo bar>;
foo; import <foo bar>;
// CHECK: foo import <foo bar>;
foo import <foo bar>;
// CHECK: import <foo bar> {{\[\[ ]]}};
import <foo bar> [[ ]];
// CHECK: import <foo bar> import <foo bar>;
import <foo bar> import <foo bar>;
// FIXME: We do not form header-name tokens in the pp-import-suffix of a
// pp-import. Conforming programs can't tell the difference.
// CHECK: import <foo bar> {} import <foo bar>;
// FIXME: import <foo bar> {} import <foo bar>;
import <foo bar> {} import <foo bar>;
// CHECK: export import <foo bar>;
export import <foo bar>;
// CHECK: export export import <foo bar>;
export export import <foo bar>;
#define UNBALANCED_PAREN (
// CHECK: import <foo bar>;
import <foo bar>;
UNBALANCED_PAREN
// CHECK: import <foo bar>;
import <foo bar>;
)
_Pragma("clang no_such_pragma (");
// CHECK: import <foo bar>;
import <foo bar>;
#define HEADER <foo bar>
// CHECK: import <foo bar>;
import HEADER;
// CHECK: import <foo bar>;
import <
foo
bar
>;
// CHECK: import{{$}}
// CHECK: {{^}}<foo bar>;
import
<
foo
bar
>;
// CHECK: import{{$}}
// CHECK: {{^}}<foo bar>;
import
<foo bar>;
#define IMPORT import <foo bar>
// CHECK: import <foo bar>;
IMPORT;

View File

@ -7,10 +7,10 @@
// Sanity check that we won't somehow find non-canonical module names or
// modules where we shouldn't search the framework.
// RUN: echo '@import NameInModMap' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
// RUN: echo '@import NameInDir' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
// RUN: echo '@import NameInImport' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
// RUN: echo '@import NameInImportInferred' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
// RUN: echo '@import NameInModMap;' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
// RUN: echo '@import NameInDir;' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
// RUN: echo '@import NameInImport;' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
// RUN: echo '@import NameInImportInferred;' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
// CHECK: module '{{.*}}' not found
// FIXME: We might want to someday lock down framework modules so that these

View File

@ -21,7 +21,7 @@
// CHECK: Language options:
// CHECK: C99: Yes
// CHECK: Objective-C: Yes
// CHECK: modules extension to C: Yes
// CHECK: modules semantics: Yes
// CHECK: Module features:
// CHECK: myfeature