Rollup merge of #127233 - nnethercote:parser-cleanups, r=petrochenkov

Some parser cleanups Cleanups I made while looking closely at this code. r? ``@petrochenkov``
2024-07-03 17:26:55 +02:00 · 2024-07-03 17:26:55 +02:00 · 7fdb2f5cab
parent 9b05e7b66e edeebe675b
commit 7fdb2f5cab
8 changed files with 97 additions and 119 deletions
--- a/compiler/rustc_ast/src/attr/mod.rs
+++ b/compiler/rustc_ast/src/attr/mod.rs
@ -204,12 +204,14 @@ impl Attribute {

    pub fn tokens(&self) -> TokenStream {
        match &self.kind {
-            AttrKind::Normal(normal) => normal
-                .tokens
-                .as_ref()
-                .unwrap_or_else(|| panic!("attribute is missing tokens: {self:?}"))
-                .to_attr_token_stream()
-                .to_tokenstream(),
+            AttrKind::Normal(normal) => TokenStream::new(
+                normal
+                    .tokens
+                    .as_ref()
+                    .unwrap_or_else(|| panic!("attribute is missing tokens: {self:?}"))
+                    .to_attr_token_stream()
+                    .to_token_trees(),
+            ),
            &AttrKind::DocComment(comment_kind, data) => TokenStream::token_alone(
                token::DocComment(comment_kind, self.style, data),
                self.span,
--- a/compiler/rustc_ast/src/tokenstream.rs
+++ b/compiler/rustc_ast/src/tokenstream.rs
@ -23,7 +23,6 @@ use rustc_data_structures::sync::{self, Lrc};
 use rustc_macros::{Decodable, Encodable, HashStable_Generic};
 use rustc_serialize::{Decodable, Encodable};
 use rustc_span::{sym, Span, SpanDecoder, SpanEncoder, Symbol, DUMMY_SP};
-use smallvec::{smallvec, SmallVec};

 use std::borrow::Cow;
 use std::{cmp, fmt, iter};
@ -180,27 +179,25 @@ impl AttrTokenStream {
        AttrTokenStream(Lrc::new(tokens))
    }

-    /// Converts this `AttrTokenStream` to a plain `TokenStream`.
+    /// Converts this `AttrTokenStream` to a plain `Vec<TokenTree>`.
    /// During conversion, `AttrTokenTree::Attributes` get 'flattened'
    /// back to a `TokenStream` of the form `outer_attr attr_target`.
    /// If there are inner attributes, they are inserted into the proper
    /// place in the attribute target tokens.
-    pub fn to_tokenstream(&self) -> TokenStream {
-        let trees: Vec<_> = self
-            .0
-            .iter()
-            .flat_map(|tree| match &tree {
+    pub fn to_token_trees(&self) -> Vec<TokenTree> {
+        let mut res = Vec::with_capacity(self.0.len());
+        for tree in self.0.iter() {
+            match tree {
                AttrTokenTree::Token(inner, spacing) => {
-                    smallvec![TokenTree::Token(inner.clone(), *spacing)].into_iter()
+                    res.push(TokenTree::Token(inner.clone(), *spacing));
                }
                AttrTokenTree::Delimited(span, spacing, delim, stream) => {
-                    smallvec![TokenTree::Delimited(
+                    res.push(TokenTree::Delimited(
                        *span,
                        *spacing,
                        *delim,
-                        stream.to_tokenstream()
-                    ),]
-                    .into_iter()
+                        TokenStream::new(stream.to_token_trees()),
+                    ))
                }
                AttrTokenTree::Attributes(data) => {
                    let idx = data
@ -208,14 +205,7 @@ impl AttrTokenStream {
                        .partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer));
                    let (outer_attrs, inner_attrs) = data.attrs.split_at(idx);

-                    let mut target_tokens: Vec<_> = data
-                        .tokens
-                        .to_attr_token_stream()
-                        .to_tokenstream()
-                        .0
-                        .iter()
-                        .cloned()
-                        .collect();
+                    let mut target_tokens = data.tokens.to_attr_token_stream().to_token_trees();
                    if !inner_attrs.is_empty() {
                        let mut found = false;
                        // Check the last two trees (to account for a trailing semi)
@ -251,17 +241,14 @@ impl AttrTokenStream {
                            "Failed to find trailing delimited group in: {target_tokens:?}"
                        );
                    }
-                    let mut flat: SmallVec<[_; 1]> =
-                        SmallVec::with_capacity(target_tokens.len() + outer_attrs.len());
                    for attr in outer_attrs {
-                        flat.extend(attr.tokens().0.iter().cloned());
+                        res.extend(attr.tokens().0.iter().cloned());
                    }
-                    flat.extend(target_tokens);
-                    flat.into_iter()
+                    res.extend(target_tokens);
                }
-            })
-            .collect();
-        TokenStream::new(trees)
+            }
+        }
+        res
    }
 }

@ -409,8 +396,8 @@ impl PartialEq<TokenStream> for TokenStream {
 }

 impl TokenStream {
-    pub fn new(streams: Vec<TokenTree>) -> TokenStream {
-        TokenStream(Lrc::new(streams))
+    pub fn new(tts: Vec<TokenTree>) -> TokenStream {
+        TokenStream(Lrc::new(tts))
    }

    pub fn is_empty(&self) -> bool {
@ -461,7 +448,7 @@ impl TokenStream {
                AttributesData { attrs: attrs.iter().cloned().collect(), tokens: tokens.clone() };
            AttrTokenStream::new(vec![AttrTokenTree::Attributes(attr_data)])
        };
-        attr_stream.to_tokenstream()
+        TokenStream::new(attr_stream.to_token_trees())
    }

    pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream {
--- a/compiler/rustc_builtin_macros/src/cfg_eval.rs
+++ b/compiler/rustc_builtin_macros/src/cfg_eval.rs
@ -38,16 +38,14 @@ pub(crate) fn cfg_eval(
    lint_node_id: NodeId,
 ) -> Annotatable {
    let features = Some(features);
-    CfgEval { cfg: &mut StripUnconfigured { sess, features, config_tokens: true, lint_node_id } }
+    CfgEval(StripUnconfigured { sess, features, config_tokens: true, lint_node_id })
        .configure_annotatable(annotatable)
        // Since the item itself has already been configured by the `InvocationCollector`,
        // we know that fold result vector will contain exactly one element.
        .unwrap()
 }

-struct CfgEval<'a, 'b> {
-    cfg: &'a mut StripUnconfigured<'b>,
-}
+struct CfgEval<'a>(StripUnconfigured<'a>);

 fn flat_map_annotatable(
    vis: &mut impl MutVisitor,
@ -125,9 +123,9 @@ fn has_cfg_or_cfg_attr(annotatable: &Annotatable) -> bool {
    res.is_break()
 }

-impl CfgEval<'_, '_> {
+impl CfgEval<'_> {
    fn configure<T: HasAttrs + HasTokens>(&mut self, node: T) -> Option<T> {
-        self.cfg.configure(node)
+        self.0.configure(node)
    }

    fn configure_annotatable(&mut self, mut annotatable: Annotatable) -> Option<Annotatable> {
@ -196,7 +194,7 @@ impl CfgEval<'_, '_> {
        // Re-parse the tokens, setting the `capture_cfg` flag to save extra information
        // to the captured `AttrTokenStream` (specifically, we capture
        // `AttrTokenTree::AttributesData` for all occurrences of `#[cfg]` and `#[cfg_attr]`)
-        let mut parser = Parser::new(&self.cfg.sess.psess, orig_tokens, None);
+        let mut parser = Parser::new(&self.0.sess.psess, orig_tokens, None);
        parser.capture_cfg = true;
        match parse_annotatable_with(&mut parser) {
            Ok(a) => annotatable = a,
@ -212,16 +210,16 @@ impl CfgEval<'_, '_> {
    }
 }

-impl MutVisitor for CfgEval<'_, '_> {
+impl MutVisitor for CfgEval<'_> {
    #[instrument(level = "trace", skip(self))]
    fn visit_expr(&mut self, expr: &mut P<ast::Expr>) {
-        self.cfg.configure_expr(expr, false);
+        self.0.configure_expr(expr, false);
        mut_visit::noop_visit_expr(expr, self);
    }

    #[instrument(level = "trace", skip(self))]
    fn visit_method_receiver_expr(&mut self, expr: &mut P<ast::Expr>) {
-        self.cfg.configure_expr(expr, true);
+        self.0.configure_expr(expr, true);
        mut_visit::noop_visit_expr(expr, self);
    }

--- a/compiler/rustc_expand/src/mbe/diagnostics.rs
+++ b/compiler/rustc_expand/src/mbe/diagnostics.rs
@ -120,21 +120,21 @@ struct CollectTrackerAndEmitter<'a, 'cx, 'matcher> {

 struct BestFailure {
    token: Token,
-    position_in_tokenstream: usize,
+    position_in_tokenstream: u32,
    msg: &'static str,
    remaining_matcher: MatcherLoc,
 }

 impl BestFailure {
-    fn is_better_position(&self, position: usize) -> bool {
+    fn is_better_position(&self, position: u32) -> bool {
        position > self.position_in_tokenstream
    }
 }

 impl<'a, 'cx, 'matcher> Tracker<'matcher> for CollectTrackerAndEmitter<'a, 'cx, 'matcher> {
-    type Failure = (Token, usize, &'static str);
+    type Failure = (Token, u32, &'static str);

-    fn build_failure(tok: Token, position: usize, msg: &'static str) -> Self::Failure {
+    fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
        (tok, position, msg)
    }

@ -211,9 +211,9 @@ impl<'matcher> FailureForwarder<'matcher> {
 }

 impl<'matcher> Tracker<'matcher> for FailureForwarder<'matcher> {
-    type Failure = (Token, usize, &'static str);
+    type Failure = (Token, u32, &'static str);

-    fn build_failure(tok: Token, position: usize, msg: &'static str) -> Self::Failure {
+    fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
        (tok, position, msg)
    }

--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@ -452,7 +452,7 @@ impl TtParser {
        &mut self,
        matcher: &'matcher [MatcherLoc],
        token: &Token,
-        approx_position: usize,
+        approx_position: u32,
        track: &mut T,
    ) -> Option<NamedParseResult<T::Failure>> {
        // Matcher positions that would be valid if the macro invocation was over now. Only
--- a/compiler/rustc_expand/src/mbe/macro_rules.rs
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@ -153,7 +153,7 @@ pub(super) trait Tracker<'matcher> {
    /// Arm failed to match. If the token is `token::Eof`, it indicates an unexpected
    /// end of macro invocation. Otherwise, it indicates that no rules expected the given token.
    /// The usize is the approximate position of the token in the input token stream.
-    fn build_failure(tok: Token, position: usize, msg: &'static str) -> Self::Failure;
+    fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure;

    /// This is called before trying to match next MatcherLoc on the current token.
    fn before_match_loc(&mut self, _parser: &TtParser, _matcher: &'matcher MatcherLoc) {}
@ -182,7 +182,7 @@ pub(super) struct NoopTracker;
 impl<'matcher> Tracker<'matcher> for NoopTracker {
    type Failure = ();

-    fn build_failure(_tok: Token, _position: usize, _msg: &'static str) -> Self::Failure {}
+    fn build_failure(_tok: Token, _position: u32, _msg: &'static str) -> Self::Failure {}

    fn description() -> &'static str {
        "none"
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@ -9,6 +9,7 @@ use rustc_session::parse::ParseSess;
 use rustc_span::{sym, Span, DUMMY_SP};

 use std::ops::Range;
+use std::{iter, mem};

 /// A wrapper type to ensure that the parser handles outer attributes correctly.
 /// When we parse outer attributes, we need to ensure that we capture tokens
@ -29,15 +30,15 @@ pub struct AttrWrapper {
    // The start of the outer attributes in the token cursor.
    // This allows us to create a `ReplaceRange` for the entire attribute
    // target, including outer attributes.
-    start_pos: usize,
+    start_pos: u32,
 }

 impl AttrWrapper {
-    pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper {
+    pub(super) fn new(attrs: AttrVec, start_pos: u32) -> AttrWrapper {
        AttrWrapper { attrs, start_pos }
    }
    pub fn empty() -> AttrWrapper {
-        AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX }
+        AttrWrapper { attrs: AttrVec::new(), start_pos: u32::MAX }
    }

    pub(crate) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
@ -53,7 +54,7 @@ impl AttrWrapper {
    // FIXME: require passing an NT to prevent misuse of this method
    pub(crate) fn prepend_to_nt_inner(self, attrs: &mut AttrVec) {
        let mut self_attrs = self.attrs;
-        std::mem::swap(attrs, &mut self_attrs);
+        mem::swap(attrs, &mut self_attrs);
        attrs.extend(self_attrs);
    }

@ -91,7 +92,7 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
 struct LazyAttrTokenStreamImpl {
    start_token: (Token, Spacing),
    cursor_snapshot: TokenCursor,
-    num_calls: usize,
+    num_calls: u32,
    break_last_token: bool,
    replace_ranges: Box<[ReplaceRange]>,
 }
@ -104,15 +105,16 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
        // produce an empty `TokenStream` if no calls were made, and omit the
        // final token otherwise.
        let mut cursor_snapshot = self.cursor_snapshot.clone();
-        let tokens =
-            std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
-                .chain(std::iter::repeat_with(|| {
-                    let token = cursor_snapshot.next();
-                    (FlatToken::Token(token.0), token.1)
-                }))
-                .take(self.num_calls);
+        let tokens = iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
+            .chain(iter::repeat_with(|| {
+                let token = cursor_snapshot.next();
+                (FlatToken::Token(token.0), token.1)
+            }))
+            .take(self.num_calls as usize);

-        if !self.replace_ranges.is_empty() {
+        if self.replace_ranges.is_empty() {
+            make_attr_token_stream(tokens, self.break_last_token)
+        } else {
            let mut tokens: Vec<_> = tokens.collect();
            let mut replace_ranges = self.replace_ranges.to_vec();
            replace_ranges.sort_by_key(|(range, _)| range.start);
@ -156,7 +158,7 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
                // This keeps the total length of `tokens` constant throughout the
                // replacement process, allowing us to use all of the `ReplaceRanges` entries
                // without adjusting indices.
-                let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone))
+                let filler = iter::repeat((FlatToken::Empty, Spacing::Alone))
                    .take(range.len() - new_tokens.len());

                tokens.splice(
@ -164,9 +166,7 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
                    new_tokens.into_iter().chain(filler),
                );
            }
-            make_token_stream(tokens.into_iter(), self.break_last_token)
-        } else {
-            make_token_stream(tokens, self.break_last_token)
+            make_attr_token_stream(tokens.into_iter(), self.break_last_token)
        }
    }
 }
@ -218,24 +218,23 @@ impl<'a> Parser<'a> {
        let start_token = (self.token.clone(), self.token_spacing);
        let cursor_snapshot = self.token_cursor.clone();
        let start_pos = self.num_bump_calls;
-
        let has_outer_attrs = !attrs.attrs.is_empty();
-        let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
        let replace_ranges_start = self.capture_state.replace_ranges.len();

-        let ret = f(self, attrs.attrs);
-
-        self.capture_state.capturing = prev_capturing;
-
-        let (mut ret, trailing) = ret?;
+        let (mut ret, trailing) = {
+            let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
+            let ret_and_trailing = f(self, attrs.attrs);
+            self.capture_state.capturing = prev_capturing;
+            ret_and_trailing?
+        };

        // When we're not in `capture-cfg` mode, then bail out early if:
        // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
        //    so there's nothing for us to do.
        // 2. Our target already has tokens set (e.g. we've parsed something
-        // like `#[my_attr] $item`. The actual parsing code takes care of prepending
-        // any attributes to the nonterminal, so we don't need to modify the
-        // already captured tokens.
+        //    like `#[my_attr] $item`). The actual parsing code takes care of
+        //    prepending any attributes to the nonterminal, so we don't need to
+        //    modify the already captured tokens.
        // Note that this check is independent of `force_collect`- if we already
        // have tokens, or can't even store them, then there's never a need to
        // force collection of new tokens.
@ -276,37 +275,32 @@ impl<'a> Parser<'a> {

        let replace_ranges_end = self.capture_state.replace_ranges.len();

-        let mut end_pos = self.num_bump_calls;
-
-        let mut captured_trailing = false;
-
        // Capture a trailing token if requested by the callback 'f'
-        match trailing {
-            TrailingToken::None => {}
+        let captured_trailing = match trailing {
+            TrailingToken::None => false,
            TrailingToken::Gt => {
                assert_eq!(self.token.kind, token::Gt);
+                false
            }
            TrailingToken::Semi => {
                assert_eq!(self.token.kind, token::Semi);
-                end_pos += 1;
-                captured_trailing = true;
+                true
            }
-            TrailingToken::MaybeComma => {
-                if self.token.kind == token::Comma {
-                    end_pos += 1;
-                    captured_trailing = true;
-                }
-            }
-        }
+            TrailingToken::MaybeComma => self.token.kind == token::Comma,
+        };

-        // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens),
-        // then extend the range of captured tokens to include it, since the parser
-        // was not actually bumped past it. When the `LazyAttrTokenStream` gets converted
-        // into an `AttrTokenStream`, we will create the proper token.
-        if self.break_last_token {
-            assert!(!captured_trailing, "Cannot set break_last_token and have trailing token");
-            end_pos += 1;
-        }
+        assert!(
+            !(self.break_last_token && captured_trailing),
+            "Cannot set break_last_token and have trailing token"
+        );
+
+        let end_pos = self.num_bump_calls
+            + captured_trailing as u32
+            // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens), then
+            // extend the range of captured tokens to include it, since the parser was not actually
+            // bumped past it. When the `LazyAttrTokenStream` gets converted into an
+            // `AttrTokenStream`, we will create the proper token.
+            + self.break_last_token as u32;

        let num_calls = end_pos - start_pos;

@ -318,14 +312,11 @@ impl<'a> Parser<'a> {
            // Grab any replace ranges that occur *inside* the current AST node.
            // We will perform the actual replacement when we convert the `LazyAttrTokenStream`
            // to an `AttrTokenStream`.
-            let start_calls: u32 = start_pos.try_into().unwrap();
            self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
                .iter()
                .cloned()
                .chain(inner_attr_replace_ranges.iter().cloned())
-                .map(|(range, tokens)| {
-                    ((range.start - start_calls)..(range.end - start_calls), tokens)
-                })
+                .map(|(range, tokens)| ((range.start - start_pos)..(range.end - start_pos), tokens))
                .collect()
        };

@ -340,7 +331,7 @@ impl<'a> Parser<'a> {
        // If we support tokens at all
        if let Some(target_tokens) = ret.tokens_mut() {
            if target_tokens.is_none() {
-                // Store se our newly captured tokens into the AST node
+                // Store our newly captured tokens into the AST node.
                *target_tokens = Some(tokens.clone());
            }
        }
@ -382,10 +373,10 @@ impl<'a> Parser<'a> {
    }
 }

-/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
-/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
-/// of open and close delims.
-fn make_token_stream(
+/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
+/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
+/// close delims.
+fn make_attr_token_stream(
    mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
    break_last_token: bool,
 ) -> AttrTokenStream {
@ -464,6 +455,6 @@ mod size_asserts {
    use rustc_data_structures::static_assert_size;
    // tidy-alphabetical-start
    static_assert_size!(AttrWrapper, 16);
-    static_assert_size!(LazyAttrTokenStreamImpl, 104);
+    static_assert_size!(LazyAttrTokenStreamImpl, 96);
    // tidy-alphabetical-end
 }
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -153,7 +153,7 @@ pub struct Parser<'a> {
    expected_tokens: Vec<TokenType>,
    token_cursor: TokenCursor,
    // The number of calls to `bump`, i.e. the position in the token stream.
-    num_bump_calls: usize,
+    num_bump_calls: u32,
    // During parsing we may sometimes need to 'unglue' a glued token into two
    // component tokens (e.g. '>>' into '>' and '>), so the parser can consume
    // them one at a time. This process bypasses the normal capturing mechanism
@ -192,7 +192,7 @@ pub struct Parser<'a> {
 // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
 // it doesn't unintentionally get bigger.
 #[cfg(target_pointer_width = "64")]
-rustc_data_structures::static_assert_size!(Parser<'_>, 264);
+rustc_data_structures::static_assert_size!(Parser<'_>, 256);

 /// Stores span information about a closure.
 #[derive(Clone, Debug)]
@ -1572,7 +1572,7 @@ impl<'a> Parser<'a> {
        self.expected_tokens.clear();
    }

-    pub fn approx_token_stream_pos(&self) -> usize {
+    pub fn approx_token_stream_pos(&self) -> u32 {
        self.num_bump_calls
    }
 }