Auto merge of #103898 - Nilstrieb:match-macro, r=nnethercote

Retry failed macro matching for diagnostics

When a declarative macro fails to match, retry the matching to collect diagnostic info instead of collecting it on the fly in the hot path. Split out of #103439.

You made a bunch of changes to declarative macro matching, so
r? `@nnethercote`

This change should produce a few small perf wins: https://github.com/rust-lang/rust/pull/103439#issuecomment-1294249602
This commit is contained in:
bors 2022-11-11 04:54:59 +00:00
commit b7b7f2716e
3 changed files with 290 additions and 144 deletions

View File

@ -52,7 +52,7 @@ impl KleeneToken {
/// A Kleene-style [repetition operator](https://en.wikipedia.org/wiki/Kleene_star)
/// for token sequences.
#[derive(Clone, PartialEq, Encodable, Decodable, Debug, Copy)]
enum KleeneOp {
pub(crate) enum KleeneOp {
/// Kleene star (`*`) for zero or more repetitions
ZeroOrMore,
/// Kleene plus (`+`) for one or more repetitions

View File

@ -73,17 +73,17 @@
pub(crate) use NamedMatch::*;
pub(crate) use ParseResult::*;
use crate::mbe::{KleeneOp, TokenTree};
use crate::mbe::{macro_rules::Tracker, KleeneOp, TokenTree};
use rustc_ast::token::{self, DocComment, Nonterminal, NonterminalKind, Token};
use rustc_lint_defs::pluralize;
use rustc_parse::parser::{NtOrTt, Parser};
use rustc_span::symbol::MacroRulesNormalizedIdent;
use rustc_span::Span;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use rustc_errors::ErrorGuaranteed;
use rustc_lint_defs::pluralize;
use rustc_parse::parser::{NtOrTt, Parser};
use rustc_span::symbol::Ident;
use rustc_span::symbol::MacroRulesNormalizedIdent;
use rustc_span::Span;
use std::borrow::Cow;
use std::collections::hash_map::Entry::{Occupied, Vacant};
@ -96,7 +96,8 @@ use std::collections::hash_map::Entry::{Occupied, Vacant};
///
/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
/// simply incrementing the current matcher position index by one.
pub(super) enum MatcherLoc {
#[derive(Debug)]
pub(crate) enum MatcherLoc {
Token {
token: Token,
},
@ -270,13 +271,17 @@ pub(crate) enum ParseResult<T> {
Failure(Token, &'static str),
/// Fatal error (malformed macro?). Abort compilation.
Error(rustc_span::Span, String),
ErrorReported,
ErrorReported(ErrorGuaranteed),
}
/// A `ParseResult` where the `Success` variant contains a mapping of
/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping
/// of metavars to the token trees they bind to.
pub(crate) type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, NamedMatch>>;
pub(crate) type NamedParseResult = ParseResult<NamedMatches>;
/// Contains a mapping of `MacroRulesNormalizedIdent`s to `NamedMatch`es.
/// This represents the mapping of metavars to the token trees they bind to.
pub(crate) type NamedMatches = FxHashMap<MacroRulesNormalizedIdent, NamedMatch>;
/// Count how many metavars declarations are in `matcher`.
pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize {
@ -400,17 +405,21 @@ impl TtParser {
///
/// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept
/// track of through the mps generated.
fn parse_tt_inner(
fn parse_tt_inner<'matcher, T: Tracker<'matcher>>(
&mut self,
matcher: &[MatcherLoc],
matcher: &'matcher [MatcherLoc],
token: &Token,
track: &mut T,
) -> Option<NamedParseResult> {
// Matcher positions that would be valid if the macro invocation was over now. Only
// modified if `token == Eof`.
let mut eof_mps = EofMatcherPositions::None;
while let Some(mut mp) = self.cur_mps.pop() {
match &matcher[mp.idx] {
let matcher_loc = &matcher[mp.idx];
track.before_match_loc(self, matcher_loc);
match matcher_loc {
MatcherLoc::Token { token: t } => {
// If it's a doc comment, we just ignore it and move on to the next tt in the
// matcher. This is a bug, but #95267 showed that existing programs rely on
@ -450,7 +459,7 @@ impl TtParser {
// Try zero matches of this sequence, by skipping over it.
self.cur_mps.push(MatcherPos {
idx: idx_first_after,
matches: mp.matches.clone(), // a cheap clone
matches: Lrc::clone(&mp.matches),
});
}
@ -464,7 +473,7 @@ impl TtParser {
// processed next time around the loop.
let ending_mp = MatcherPos {
idx: mp.idx + 1, // +1 skips the Kleene op
matches: mp.matches.clone(), // a cheap clone
matches: Lrc::clone(&mp.matches),
};
self.cur_mps.push(ending_mp);
@ -480,7 +489,7 @@ impl TtParser {
// will fail quietly when it is processed next time around the loop.
let ending_mp = MatcherPos {
idx: mp.idx + 2, // +2 skips the separator and the Kleene op
matches: mp.matches.clone(), // a cheap clone
matches: Lrc::clone(&mp.matches),
};
self.cur_mps.push(ending_mp);
@ -552,10 +561,11 @@ impl TtParser {
}
/// Match the token stream from `parser` against `matcher`.
pub(super) fn parse_tt(
pub(super) fn parse_tt<'matcher, T: Tracker<'matcher>>(
&mut self,
parser: &mut Cow<'_, Parser<'_>>,
matcher: &[MatcherLoc],
matcher: &'matcher [MatcherLoc],
track: &mut T,
) -> NamedParseResult {
// A queue of possible matcher positions. We initialize it with the matcher position in
// which the "dot" is before the first token of the first token tree in `matcher`.
@ -571,7 +581,8 @@ impl TtParser {
// Process `cur_mps` until either we have finished the input or we need to get some
// parsing from the black-box parser done.
if let Some(res) = self.parse_tt_inner(matcher, &parser.token) {
let res = self.parse_tt_inner(matcher, &parser.token, track);
if let Some(res) = res {
return res;
}
@ -612,14 +623,14 @@ impl TtParser {
// edition-specific matching behavior for non-terminals.
let nt = match parser.to_mut().parse_nonterminal(kind) {
Err(mut err) => {
err.span_label(
let guarantee = err.span_label(
span,
format!(
"while parsing argument for this `{kind}` macro fragment"
),
)
.emit();
return ErrorReported;
return ErrorReported(guarantee);
}
Ok(nt) => nt,
};

View File

@ -14,7 +14,9 @@ use rustc_ast::{NodeId, DUMMY_NODE_ID};
use rustc_ast_pretty::pprust;
use rustc_attr::{self as attr, TransparencyError};
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, DiagnosticMessage};
use rustc_errors::{
Applicability, Diagnostic, DiagnosticBuilder, DiagnosticMessage, ErrorGuaranteed,
};
use rustc_feature::Features;
use rustc_lint_defs::builtin::{
RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
@ -33,6 +35,8 @@ use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::{mem, slice};
use super::macro_parser::{NamedMatches, NamedParseResult};
pub(crate) struct ParserAnyMacro<'a> {
parser: Parser<'a>,
@ -205,8 +209,32 @@ fn trace_macros_note(cx_expansions: &mut FxIndexMap<Span, Vec<String>>, sp: Span
cx_expansions.entry(sp).or_default().push(message);
}
pub(super) trait Tracker<'matcher> {
/// This is called before trying to match next MatcherLoc on the current token.
fn before_match_loc(&mut self, parser: &TtParser, matcher: &'matcher MatcherLoc);
/// This is called after an arm has been parsed, either successfully or unsuccessfully. When this is called,
/// `before_match_loc` was called at least once (with a `MatcherLoc::Eof`).
fn after_arm(&mut self, result: &NamedParseResult);
/// For tracing.
fn description() -> &'static str;
}
/// A noop tracker that is used in the hot path of the expansion, has zero overhead thanks to monomorphization.
struct NoopTracker;
impl<'matcher> Tracker<'matcher> for NoopTracker {
fn before_match_loc(&mut self, _: &TtParser, _: &'matcher MatcherLoc) {}
fn after_arm(&mut self, _: &NamedParseResult) {}
fn description() -> &'static str {
"none"
}
}
/// Expands the rules based macro defined by `lhses` and `rhses` for a given
/// input `arg`.
#[instrument(skip(cx, transparency, arg, lhses, rhses))]
fn expand_macro<'cx>(
cx: &'cx mut ExtCtxt<'_>,
sp: Span,
@ -228,46 +256,11 @@ fn expand_macro<'cx>(
trace_macros_note(&mut cx.expansions, sp, msg);
}
// Which arm's failure should we report? (the one furthest along)
let mut best_failure: Option<(Token, &str)> = None;
// We create a base parser that can be used for the "black box" parts.
// Every iteration needs a fresh copy of that parser. However, the parser
// is not mutated on many of the iterations, particularly when dealing with
// macros like this:
//
// macro_rules! foo {
// ("a") => (A);
// ("b") => (B);
// ("c") => (C);
// // ... etc. (maybe hundreds more)
// }
//
// as seen in the `html5ever` benchmark. We use a `Cow` so that the base
// parser is only cloned when necessary (upon mutation). Furthermore, we
// reinitialize the `Cow` with the base parser at the start of every
// iteration, so that any mutated parsers are not reused. This is all quite
// hacky, but speeds up the `html5ever` benchmark significantly. (Issue
// 68836 suggests a more comprehensive but more complex change to deal with
// this situation.)
// FIXME(Nilstrieb): Stop recovery from happening on this parser and retry later with recovery if the macro failed to match.
let parser = parser_from_cx(sess, arg.clone());
// Try each arm's matchers.
let mut tt_parser = TtParser::new(name);
for (i, lhs) in lhses.iter().enumerate() {
// Take a snapshot of the state of pre-expansion gating at this point.
// This is used so that if a matcher is not `Success(..)`ful,
// then the spans which became gated when parsing the unsuccessful matcher
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());
match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
Success(named_matches) => {
// The matcher was `Success(..)`ful.
// Merge the gated spans from parsing the matcher with the pre-existing ones.
sess.gated_spans.merge(gated_spans_snapshot);
// Track nothing for the best performance.
let try_success_result = try_match_macro(sess, name, &arg, lhses, &mut NoopTracker);
match try_success_result {
Ok((i, named_matches)) => {
let (rhs, rhs_span): (&mbe::Delimited, DelimSpan) = match &rhses[i] {
mbe::TokenTree::Delimited(span, delimited) => (&delimited, *span),
_ => cx.span_bug(sp, "malformed macro rhs"),
@ -324,37 +317,49 @@ fn expand_macro<'cx>(
is_local,
});
}
Failure(token, msg) => match best_failure {
Some((ref best_token, _)) if best_token.span.lo() >= token.span.lo() => {}
_ => best_failure = Some((token, msg)),
},
Error(err_sp, ref msg) => {
let span = err_sp.substitute_dummy(sp);
cx.struct_span_err(span, &msg).emit();
return DummyResult::any(span);
Err(CanRetry::No(_)) => {
debug!("Will not retry matching as an error was emitted already");
return DummyResult::any(sp);
}
Err(CanRetry::Yes) => {
// Retry and emit a better error below.
}
ErrorReported => return DummyResult::any(sp),
}
// The matcher was not `Success(..)`ful.
// Restore to the state before snapshotting and maybe try again.
mem::swap(&mut gated_spans_snapshot, &mut sess.gated_spans.spans.borrow_mut());
}
drop(parser);
// An error occurred, try the expansion again, tracking the expansion closely for better diagnostics.
let mut tracker = CollectTrackerAndEmitter::new(cx, sp);
let try_success_result = try_match_macro(sess, name, &arg, lhses, &mut tracker);
assert!(try_success_result.is_err(), "Macro matching returned a success on the second try");
if let Some(result) = tracker.result {
// An irrecoverable error occured and has been emitted.
return result;
}
let Some((token, label)) = tracker.best_failure else {
return tracker.result.expect("must have encountered Error or ErrorReported");
};
let (token, label) = best_failure.expect("ran no matchers");
let span = token.span.substitute_dummy(sp);
let mut err = cx.struct_span_err(span, &parse_failure_msg(&token));
err.span_label(span, label);
if !def_span.is_dummy() && !cx.source_map().is_imported(def_span) {
err.span_label(cx.source_map().guess_head_span(def_span), "when calling this macro");
}
annotate_doc_comment(&mut err, sess.source_map(), span);
// Check whether there's a missing comma in this macro call, like `println!("{}" a);`
if let Some((arg, comma_span)) = arg.add_comma() {
for lhs in lhses {
let parser = parser_from_cx(sess, arg.clone());
if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
let mut tt_parser = TtParser::new(name);
if let Success(_) =
tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs, &mut NoopTracker)
{
if comma_span.is_dummy() {
err.note("you might be missing a comma");
} else {
@ -373,6 +378,135 @@ fn expand_macro<'cx>(
DummyResult::any(sp)
}
/// The tracker used for the slow error path that collects useful info for diagnostics.
struct CollectTrackerAndEmitter<'a, 'cx> {
cx: &'a mut ExtCtxt<'cx>,
/// Which arm's failure should we report? (the one furthest along)
best_failure: Option<(Token, &'static str)>,
root_span: Span,
result: Option<Box<dyn MacResult + 'cx>>,
}
impl<'a, 'cx, 'matcher> Tracker<'matcher> for CollectTrackerAndEmitter<'a, 'cx> {
fn before_match_loc(&mut self, _parser: &TtParser, _matcher: &'matcher MatcherLoc) {
// Empty for now.
}
fn after_arm(&mut self, result: &NamedParseResult) {
match result {
Success(_) => {
unreachable!("should not collect detailed info for successful macro match");
}
Failure(token, msg) => match self.best_failure {
Some((ref best_token, _)) if best_token.span.lo() >= token.span.lo() => {}
_ => self.best_failure = Some((token.clone(), msg)),
},
Error(err_sp, msg) => {
let span = err_sp.substitute_dummy(self.root_span);
self.cx.struct_span_err(span, msg).emit();
self.result = Some(DummyResult::any(span));
}
ErrorReported(_) => self.result = Some(DummyResult::any(self.root_span)),
}
}
fn description() -> &'static str {
"detailed"
}
}
impl<'a, 'cx> CollectTrackerAndEmitter<'a, 'cx> {
fn new(cx: &'a mut ExtCtxt<'cx>, root_span: Span) -> Self {
Self { cx, best_failure: None, root_span, result: None }
}
}
enum CanRetry {
Yes,
/// We are not allowed to retry macro expansion as a fatal error has been emitted already.
No(ErrorGuaranteed),
}
/// Try expanding the macro. Returns the index of the successful arm and its named_matches if it was successful,
/// and nothing if it failed. On failure, it's the callers job to use `track` accordingly to record all errors
/// correctly.
#[instrument(level = "debug", skip(sess, arg, lhses, track), fields(tracking = %T::description()))]
fn try_match_macro<'matcher, T: Tracker<'matcher>>(
sess: &ParseSess,
name: Ident,
arg: &TokenStream,
lhses: &'matcher [Vec<MatcherLoc>],
track: &mut T,
) -> Result<(usize, NamedMatches), CanRetry> {
// We create a base parser that can be used for the "black box" parts.
// Every iteration needs a fresh copy of that parser. However, the parser
// is not mutated on many of the iterations, particularly when dealing with
// macros like this:
//
// macro_rules! foo {
// ("a") => (A);
// ("b") => (B);
// ("c") => (C);
// // ... etc. (maybe hundreds more)
// }
//
// as seen in the `html5ever` benchmark. We use a `Cow` so that the base
// parser is only cloned when necessary (upon mutation). Furthermore, we
// reinitialize the `Cow` with the base parser at the start of every
// iteration, so that any mutated parsers are not reused. This is all quite
// hacky, but speeds up the `html5ever` benchmark significantly. (Issue
// 68836 suggests a more comprehensive but more complex change to deal with
// this situation.)
// FIXME(Nilstrieb): Stop recovery from happening on this parser and retry later with recovery if the macro failed to match.
let parser = parser_from_cx(sess, arg.clone());
// Try each arm's matchers.
let mut tt_parser = TtParser::new(name);
for (i, lhs) in lhses.iter().enumerate() {
let _tracing_span = trace_span!("Matching arm", %i);
// Take a snapshot of the state of pre-expansion gating at this point.
// This is used so that if a matcher is not `Success(..)`ful,
// then the spans which became gated when parsing the unsuccessful matcher
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());
let result = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs, track);
track.after_arm(&result);
match result {
Success(named_matches) => {
debug!("Parsed arm successfully");
// The matcher was `Success(..)`ful.
// Merge the gated spans from parsing the matcher with the pre-existing ones.
sess.gated_spans.merge(gated_spans_snapshot);
return Ok((i, named_matches));
}
Failure(_, _) => {
trace!("Failed to match arm, trying the next one");
// Try the next arm.
}
Error(_, _) => {
debug!("Fatal error occurred during matching");
// We haven't emitted an error yet, so we can retry.
return Err(CanRetry::Yes);
}
ErrorReported(guarantee) => {
debug!("Fatal error occurred and was reported during matching");
// An error has been reported already, we cannot retry as that would cause duplicate errors.
return Err(CanRetry::No(guarantee));
}
}
// The matcher was not `Success(..)`ful.
// Restore to the state before snapshotting and maybe try again.
mem::swap(&mut gated_spans_snapshot, &mut sess.gated_spans.spans.borrow_mut());
}
Err(CanRetry::Yes)
}
// Note that macro-by-example's input is also matched against a token tree:
// $( $lhs:tt => $rhs:tt );+
//
@ -452,7 +586,8 @@ pub fn compile_declarative_macro(
let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
let mut tt_parser =
TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro }));
let argument_map = match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) {
let argument_map =
match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
Success(m) => m,
Failure(token, msg) => {
let s = parse_failure_msg(&token);
@ -470,7 +605,7 @@ pub fn compile_declarative_macro(
.emit();
return dummy_syn_ext();
}
ErrorReported => {
ErrorReported(_) => {
return dummy_syn_ext();
}
};