From 1fdabfbebbb12b2836f91aeec3157fd092f9a8ad Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 21 Aug 2024 14:16:42 +1000 Subject: [PATCH] Avoid double-handling of attributes in `collect_tokens`. By keeping track of attributes that have been previously processed. This fixes the `macro-rules-derive-cfg.stdout` test, and is necessary for #124141 which removes nonterminals. Also shrink the `SmallVec` inline size used in `IntervalSet`. 2 gives slightly better perf than 4 now that there's an `IntervalSet` in `Parser`, which is cloned reasonably often. --- Cargo.lock | 1 + compiler/rustc_index/src/interval.rs | 2 +- compiler/rustc_parse/Cargo.toml | 1 + .../rustc_parse/src/parser/attr_wrapper.rs | 23 +++++++++++--- compiler/rustc_parse/src/parser/mod.rs | 7 ++++- .../proc-macro/macro-rules-derive-cfg.stdout | 31 ++----------------- 6 files changed, 30 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a18219b5683..4ce6029afe8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4172,6 +4172,7 @@ dependencies = [ "rustc_errors", "rustc_feature", "rustc_fluent_macro", + "rustc_index", "rustc_lexer", "rustc_macros", "rustc_session", diff --git a/compiler/rustc_index/src/interval.rs b/compiler/rustc_index/src/interval.rs index be028feca60..503470f896d 100644 --- a/compiler/rustc_index/src/interval.rs +++ b/compiler/rustc_index/src/interval.rs @@ -18,7 +18,7 @@ mod tests; #[derive(Debug, Clone)] pub struct IntervalSet { // Start, end - map: SmallVec<[(u32, u32); 4]>, + map: SmallVec<[(u32, u32); 2]>, domain: usize, _data: PhantomData, } diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index b33896154f2..c59ae48a07d 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -12,6 +12,7 @@ rustc_data_structures = { path = "../rustc_data_structures" } rustc_errors = { path = "../rustc_errors" } rustc_feature = { path = "../rustc_feature" } rustc_fluent_macro = { path = "../rustc_fluent_macro" } +rustc_index = { path = "../rustc_index" } rustc_lexer = { path = "../rustc_lexer" } rustc_macros = { path = "../rustc_macros" } rustc_session = { path = "../rustc_session" } diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index a74c87ca2a7..b2abe5464b4 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -256,6 +256,20 @@ impl<'a> Parser<'a> { res? }; + // Ignore any attributes we've previously processed. This happens when + // an inner call to `collect_tokens` returns an AST node and then an + // outer call ends up with the same AST node without any additional + // wrapping layer. + let ret_attrs: AttrVec = ret + .attrs() + .iter() + .cloned() + .filter(|attr| { + let is_unseen = self.capture_state.seen_attrs.insert(attr.id); + is_unseen + }) + .collect(); + // When we're not in "definite capture mode", then skip collecting and // return early if either of the following conditions hold. // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`). @@ -269,7 +283,7 @@ impl<'a> Parser<'a> { // tokens. let definite_capture_mode = self.capture_cfg && matches!(self.capture_state.capturing, Capturing::Yes) - && has_cfg_or_cfg_attr(ret.attrs()); + && has_cfg_or_cfg_attr(&ret_attrs); if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) { return Ok(ret); } @@ -289,7 +303,7 @@ impl<'a> Parser<'a> { // outer and inner attributes. So this check is more precise than // the earlier `needs_tokens` check, and we don't need to // check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.) - || needs_tokens(ret.attrs()) + || needs_tokens(&ret_attrs) // - We are in "definite capture mode", which requires that there // are `#[cfg]` or `#[cfg_attr]` attributes. (During normal // non-`capture_cfg` parsing, we don't need any special capturing @@ -328,7 +342,7 @@ impl<'a> Parser<'a> { // `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`, // which means the relevant tokens will be removed. (More details below.) let mut inner_attr_parser_replacements = Vec::new(); - for attr in ret.attrs() { + for attr in ret_attrs.iter() { if attr.style == ast::AttrStyle::Inner { if let Some(inner_attr_parser_range) = self.capture_state.inner_attr_parser_ranges.remove(&attr.id) @@ -418,7 +432,7 @@ impl<'a> Parser<'a> { // cfg-expand this AST node. let start_pos = if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos }; - let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens }; + let target = AttrsTarget { attrs: ret_attrs, tokens }; tokens_used = true; self.capture_state .parser_replacements @@ -428,6 +442,7 @@ impl<'a> Parser<'a> { // the outermost call to this method. self.capture_state.parser_replacements.clear(); self.capture_state.inner_attr_parser_ranges.clear(); + self.capture_state.seen_attrs.clear(); } assert!(tokens_used); // check we didn't create `tokens` unnecessarily Ok(ret) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index b72957ace52..3ffc5fa4011 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -35,6 +35,7 @@ use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lrc; use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult}; +use rustc_index::interval::IntervalSet; use rustc_session::parse::ParseSess; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::{Span, DUMMY_SP}; @@ -183,7 +184,7 @@ pub struct Parser<'a> { // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure // it doesn't unintentionally get bigger. #[cfg(target_pointer_width = "64")] -rustc_data_structures::static_assert_size!(Parser<'_>, 256); +rustc_data_structures::static_assert_size!(Parser<'_>, 288); /// Stores span information about a closure. #[derive(Clone, Debug)] @@ -261,6 +262,9 @@ struct CaptureState { capturing: Capturing, parser_replacements: Vec, inner_attr_parser_ranges: FxHashMap, + // `IntervalSet` is good for perf because attrs are mostly added to this + // set in contiguous ranges. + seen_attrs: IntervalSet, } /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that @@ -458,6 +462,7 @@ impl<'a> Parser<'a> { capturing: Capturing::No, parser_replacements: Vec::new(), inner_attr_parser_ranges: Default::default(), + seen_attrs: IntervalSet::new(u32::MAX as usize), }, current_closure: None, recovery: Recovery::Allowed, diff --git a/tests/ui/proc-macro/macro-rules-derive-cfg.stdout b/tests/ui/proc-macro/macro-rules-derive-cfg.stdout index 5205a4adf25..c1e46b50d40 100644 --- a/tests/ui/proc-macro/macro-rules-derive-cfg.stdout +++ b/tests/ui/proc-macro/macro-rules-derive-cfg.stdout @@ -1,11 +1,9 @@ PRINT-DERIVE INPUT (DISPLAY): struct Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)] -{ #![rustc_dummy(third)] #[rustc_dummy(fourth)] #[rustc_dummy(fourth)] 30 }]); +{ #![rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]); PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): struct Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)] -{ - #! [rustc_dummy(third)] #[rustc_dummy(fourth)] #[rustc_dummy(fourth)] 30 -}]); +{ #! [rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]); PRINT-DERIVE INPUT (DEBUG): TokenStream [ Ident { ident: "struct", @@ -138,31 +136,6 @@ PRINT-DERIVE INPUT (DEBUG): TokenStream [ ], span: $DIR/macro-rules-derive-cfg.rs:25:6: 25:49 (#0), }, - Punct { - ch: '#', - spacing: Alone, - span: $DIR/macro-rules-derive-cfg.rs:25:5: 25:6 (#0), - }, - Group { - delimiter: Bracket, - stream: TokenStream [ - Ident { - ident: "rustc_dummy", - span: $DIR/macro-rules-derive-cfg.rs:25:28: 25:39 (#0), - }, - Group { - delimiter: Parenthesis, - stream: TokenStream [ - Ident { - ident: "fourth", - span: $DIR/macro-rules-derive-cfg.rs:25:40: 25:46 (#0), - }, - ], - span: $DIR/macro-rules-derive-cfg.rs:25:39: 25:47 (#0), - }, - ], - span: $DIR/macro-rules-derive-cfg.rs:25:6: 25:49 (#0), - }, Literal { kind: Integer, symbol: "30",