From ad566b78f21d58c2a25feeba06fc9506fe514b46 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 19 Apr 2022 14:15:30 +1000 Subject: [PATCH 01/20] Tweak `Cursor::next_with_spacing`. This makes it more like `CursorRef::next_with_spacing`. There is no performance effect, just a consistency improvement. --- compiler/rustc_ast/src/tokenstream.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index affb4289cb1..a4057043e48 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -586,12 +586,10 @@ impl Cursor { } pub fn next_with_spacing(&mut self) -> Option { - if self.index < self.stream.len() { + self.stream.0.get(self.index).map(|tree| { self.index += 1; - Some(self.stream.0[self.index - 1].clone()) - } else { - None - } + tree.clone() + }) } pub fn index(&self) -> usize { From aefbbeec34e82ed6ccfbb94d8689c134cd8ae9a9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 13 Apr 2022 16:18:01 +1000 Subject: [PATCH 02/20] Inline and remove `TokenTree::{open_tt,close_tt}`. They both have a single call site. --- compiler/rustc_ast/src/tokenstream.rs | 10 ---------- compiler/rustc_parse/src/parser/mod.rs | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index a4057043e48..857dc307c98 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -94,16 +94,6 @@ impl TokenTree { TokenTree::Token(Token::new(kind, span)) } - /// Returns the opening delimiter as a token tree. - pub fn open_tt(span: DelimSpan, delim: DelimToken) -> TokenTree { - TokenTree::token(token::OpenDelim(delim), span.open) - } - - /// Returns the closing delimiter as a token tree. - pub fn close_tt(span: DelimSpan, delim: DelimToken) -> TokenTree { - TokenTree::token(token::CloseDelim(delim), span.close) - } - pub fn uninterpolate(self) -> TokenTree { match self { TokenTree::Token(token) => TokenTree::Token(token.uninterpolate().into_owned()), diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index cb6be8f412c..ad6a7b90bc9 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -266,12 +266,12 @@ impl TokenCursor { loop { let (tree, spacing) = if !self.frame.open_delim { self.frame.open_delim = true; - TokenTree::open_tt(self.frame.span, self.frame.delim).into() + TokenTree::token(token::OpenDelim(self.frame.delim), self.frame.span.open).into() } else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() { tree } else if !self.frame.close_delim { self.frame.close_delim = true; - TokenTree::close_tt(self.frame.span, self.frame.delim).into() + TokenTree::token(token::CloseDelim(self.frame.delim), self.frame.span.close).into() } else if let Some(frame) = self.stack.pop() { self.frame = frame; continue; From 89ec75b0e95a62a2d1ac76f7918a469c7bb228ec Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 14 Apr 2022 11:13:20 +1000 Subject: [PATCH 03/20] Inline and remove `Parser::next_tok()`. It has a single call site. --- compiler/rustc_parse/src/parser/mod.rs | 58 ++++++++++++-------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index ad6a7b90bc9..378a533edf9 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -123,8 +123,8 @@ pub struct Parser<'a> { pub capture_cfg: bool, restrictions: Restrictions, expected_tokens: Vec, - // Important: This must only be advanced from `next_tok` - // to ensure that `token_cursor.num_next_calls` is updated properly + // Important: This must only be advanced from `bump` to ensure that + // `token_cursor.num_next_calls` is updated properly. token_cursor: TokenCursor, desugar_doc_comments: bool, /// This field is used to keep track of how many left angle brackets we have seen. This is @@ -476,33 +476,6 @@ impl<'a> Parser<'a> { parser } - #[inline] - fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) { - loop { - let (mut next, spacing) = if self.desugar_doc_comments { - self.token_cursor.inlined_next_desugared() - } else { - self.token_cursor.inlined_next() - }; - self.token_cursor.num_next_calls += 1; - // We've retrieved an token from the underlying - // cursor, so we no longer need to worry about - // an unglued token. See `break_and_eat` for more details - self.token_cursor.break_last_token = false; - if next.span.is_dummy() { - // Tweak the location for better diagnostics, but keep syntactic context intact. - next.span = fallback_span.with_ctxt(next.span.ctxt()); - } - if matches!( - next.kind, - token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) - ) { - continue; - } - return (next, spacing); - } - } - pub fn unexpected(&mut self) -> PResult<'a, T> { match self.expect_one_of(&[], &[]) { Err(e) => Err(e), @@ -697,7 +670,7 @@ impl<'a> Parser<'a> { // // If we consume any additional tokens, then this token // is not needed (we'll capture the entire 'glued' token), - // and `next_tok` will set this field to `None` + // and `bump` will set this field to `None` self.token_cursor.break_last_token = true; // Use the spacing of the glued token as the spacing // of the unglued second token. @@ -1035,8 +1008,29 @@ impl<'a> Parser<'a> { /// Advance the parser by one token. pub fn bump(&mut self) { - let next_token = self.next_tok(self.token.span); - self.inlined_bump_with(next_token); + let fallback_span = self.token.span; + loop { + let (mut next, spacing) = if self.desugar_doc_comments { + self.token_cursor.inlined_next_desugared() + } else { + self.token_cursor.inlined_next() + }; + self.token_cursor.num_next_calls += 1; + // We've retrieved an token from the underlying + // cursor, so we no longer need to worry about + // an unglued token. See `break_and_eat` for more details + self.token_cursor.break_last_token = false; + if next.span.is_dummy() { + // Tweak the location for better diagnostics, but keep syntactic context intact. + next.span = fallback_span.with_ctxt(next.span.ctxt()); + } + if !matches!( + next.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + ) { + return self.inlined_bump_with((next, spacing)); + } + } } /// Look-ahead `dist` tokens of `self.token` and get access to that token there. From b1e6dee59666d2f85a5121730ec128934519260f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 19 Apr 2022 11:36:13 +1000 Subject: [PATCH 04/20] Merge `TokenCursor::{next,next_desugared}`. And likewise for the inlined variants. I did this for simplicity, but interesting it was a performance win as well. --- .../rustc_parse/src/parser/attr_wrapper.rs | 17 +-- compiler/rustc_parse/src/parser/mod.rs | 141 ++++++++---------- 2 files changed, 72 insertions(+), 86 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 5ee9c339bb7..02749088c31 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -100,21 +100,16 @@ rustc_data_structures::static_assert_size!(LazyTokenStreamImpl, 144); impl CreateTokenStream for LazyTokenStreamImpl { fn create_token_stream(&self) -> AttrAnnotatedTokenStream { - // The token produced by the final call to `{,inlined_}next` or - // `{,inlined_}next_desugared` was not actually consumed by the - // callback. The combination of chaining the initial token and using - // `take` produces the desired result - we produce an empty - // `TokenStream` if no calls were made, and omit the final token - // otherwise. + // The token produced by the final call to `{,inlined_}next` was not + // actually consumed by the callback. The combination of chaining the + // initial token and using `take` produces the desired result - we + // produce an empty `TokenStream` if no calls were made, and omit the + // final token otherwise. let mut cursor_snapshot = self.cursor_snapshot.clone(); let tokens = std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1)) .chain((0..self.num_calls).map(|_| { - let token = if cursor_snapshot.desugar_doc_comments { - cursor_snapshot.next_desugared() - } else { - cursor_snapshot.next() - }; + let token = cursor_snapshot.next(cursor_snapshot.desugar_doc_comments); (FlatToken::Token(token.0), token.1) })) .take(self.num_calls); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 378a533edf9..b6f4cd119e0 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -206,9 +206,7 @@ struct TokenCursor { frame: TokenCursorFrame, stack: Vec, desugar_doc_comments: bool, - // Counts the number of calls to `{,inlined_}next` or - // `{,inlined_}next_desugared`, depending on whether - // `desugar_doc_comments` is set. + // Counts the number of calls to `{,inlined_}next`. num_next_calls: usize, // During parsing, we may sometimes need to 'unglue' a // glued token into two component tokens @@ -256,14 +254,14 @@ impl TokenCursorFrame { } impl TokenCursor { - fn next(&mut self) -> (Token, Spacing) { - self.inlined_next() + fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { + self.inlined_next(desugar_doc_comments) } /// This always-inlined version should only be used on hot code paths. #[inline(always)] - fn inlined_next(&mut self) -> (Token, Spacing) { - loop { + fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { + let (token, spacing) = loop { let (tree, spacing) = if !self.frame.open_delim { self.frame.open_delim = true; TokenTree::token(token::OpenDelim(self.frame.delim), self.frame.span.open).into() @@ -281,77 +279,74 @@ impl TokenCursor { match tree { TokenTree::Token(token) => { - return (token, spacing); + break (token, spacing); } TokenTree::Delimited(sp, delim, tts) => { let frame = TokenCursorFrame::new(sp, delim, tts); self.stack.push(mem::replace(&mut self.frame, frame)); } } - } - } - - fn next_desugared(&mut self) -> (Token, Spacing) { - self.inlined_next_desugared() - } - - /// This always-inlined version should only be used on hot code paths. - #[inline(always)] - fn inlined_next_desugared(&mut self) -> (Token, Spacing) { - let (data, attr_style, sp) = match self.inlined_next() { - (Token { kind: token::DocComment(_, attr_style, data), span }, _) => { - (data, attr_style, span) - } - tok => return tok, }; - // Searches for the occurrences of `"#*` and returns the minimum number of `#`s - // required to wrap the text. - let mut num_of_hashes = 0; - let mut count = 0; - for ch in data.as_str().chars() { - count = match ch { - '"' => 1, - '#' if count > 0 => count + 1, - _ => 0, - }; - num_of_hashes = cmp::max(num_of_hashes, count); + match (desugar_doc_comments, &token) { + (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { + // Searches for the occurrences of `"#*` and returns the minimum number of `#`s + // required to wrap the text. + let mut num_of_hashes = 0; + let mut count = 0; + for ch in data.as_str().chars() { + count = match ch { + '"' => 1, + '#' if count > 0 => count + 1, + _ => 0, + }; + num_of_hashes = cmp::max(num_of_hashes, count); + } + + let delim_span = DelimSpan::from_single(span); + let body = TokenTree::Delimited( + delim_span, + token::Bracket, + [ + TokenTree::token(token::Ident(sym::doc, false), span), + TokenTree::token(token::Eq, span), + TokenTree::token( + TokenKind::lit(token::StrRaw(num_of_hashes), data, None), + span, + ), + ] + .iter() + .cloned() + .collect::(), + ); + + self.stack.push(mem::replace( + &mut self.frame, + TokenCursorFrame::new( + delim_span, + token::NoDelim, + if attr_style == AttrStyle::Inner { + [ + TokenTree::token(token::Pound, span), + TokenTree::token(token::Not, span), + body, + ] + .iter() + .cloned() + .collect::() + } else { + [TokenTree::token(token::Pound, span), body] + .iter() + .cloned() + .collect::() + }, + ), + )); + + self.next(/* desugar_doc_comments */ false) + } + _ => (token, spacing), } - - let delim_span = DelimSpan::from_single(sp); - let body = TokenTree::Delimited( - delim_span, - token::Bracket, - [ - TokenTree::token(token::Ident(sym::doc, false), sp), - TokenTree::token(token::Eq, sp), - TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), sp), - ] - .iter() - .cloned() - .collect::(), - ); - - self.stack.push(mem::replace( - &mut self.frame, - TokenCursorFrame::new( - delim_span, - token::NoDelim, - if attr_style == AttrStyle::Inner { - [TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body] - .iter() - .cloned() - .collect::() - } else { - [TokenTree::token(token::Pound, sp), body] - .iter() - .cloned() - .collect::() - }, - ), - )); - - self.next() } } @@ -1010,11 +1005,7 @@ impl<'a> Parser<'a> { pub fn bump(&mut self) { let fallback_span = self.token.span; loop { - let (mut next, spacing) = if self.desugar_doc_comments { - self.token_cursor.inlined_next_desugared() - } else { - self.token_cursor.inlined_next() - }; + let (mut next, spacing) = self.token_cursor.inlined_next(self.desugar_doc_comments); self.token_cursor.num_next_calls += 1; // We've retrieved an token from the underlying // cursor, so we no longer need to worry about @@ -1063,7 +1054,7 @@ impl<'a> Parser<'a> { let mut i = 0; let mut token = Token::dummy(); while i < dist { - token = cursor.next().0; + token = cursor.next(/* desugar_doc_comments */ false).0; if matches!( token.kind, token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) From 02317542ebceef780ddac660d751cbe268f56105 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 19 Apr 2022 13:41:02 +1000 Subject: [PATCH 05/20] Rearrange `TokenCursor::inlined_next()`. In particular, avoid wrapping a token within `TokenTree::Token` and then immediately matching it and returning the token within. Just return the token immediately. --- compiler/rustc_parse/src/parser/mod.rs | 37 ++++++++++++++------------ 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index b6f4cd119e0..513a95234ab 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -262,29 +262,32 @@ impl TokenCursor { #[inline(always)] fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { let (token, spacing) = loop { - let (tree, spacing) = if !self.frame.open_delim { + if !self.frame.open_delim { self.frame.open_delim = true; - TokenTree::token(token::OpenDelim(self.frame.delim), self.frame.span.open).into() - } else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() { - tree + return ( + Token::new(token::OpenDelim(self.frame.delim), self.frame.span.open), + Spacing::Alone, + ); + } else if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing() { + match tree { + TokenTree::Token(token) => { + break (token, spacing); + } + TokenTree::Delimited(sp, delim, tts) => { + let frame = TokenCursorFrame::new(sp, delim, tts); + self.stack.push(mem::replace(&mut self.frame, frame)); + } + } } else if !self.frame.close_delim { self.frame.close_delim = true; - TokenTree::token(token::CloseDelim(self.frame.delim), self.frame.span.close).into() + return ( + Token::new(token::CloseDelim(self.frame.delim), self.frame.span.close), + Spacing::Alone, + ); } else if let Some(frame) = self.stack.pop() { self.frame = frame; - continue; } else { - (TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone) - }; - - match tree { - TokenTree::Token(token) => { - break (token, spacing); - } - TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, tts); - self.stack.push(mem::replace(&mut self.frame, frame)); - } + return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } }; From 29c78cc086d55b46401f3ba9ca89ad6e95c57c8d Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 19 Apr 2022 13:51:12 +1000 Subject: [PATCH 06/20] Add {open,close}_delim arguments to `TokenCursorFrame::new()`. This will facilitate the change in the next commit. `boolean` arguments aren't great, but the function is only used in three places within this one file. --- compiler/rustc_parse/src/parser/mod.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 513a95234ab..2e2fc6694a5 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -242,14 +242,14 @@ struct TokenCursorFrame { } impl TokenCursorFrame { - fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { - TokenCursorFrame { - delim, - span, - open_delim: false, - tree_cursor: tts.into_trees(), - close_delim: false, - } + fn new( + span: DelimSpan, + delim: DelimToken, + open_delim: bool, + tts: TokenStream, + close_delim: bool, + ) -> Self { + TokenCursorFrame { delim, span, open_delim, tree_cursor: tts.into_trees(), close_delim } } } @@ -274,7 +274,7 @@ impl TokenCursor { break (token, spacing); } TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, tts); + let frame = TokenCursorFrame::new(sp, delim, false, tts, false); self.stack.push(mem::replace(&mut self.frame, frame)); } } @@ -328,6 +328,7 @@ impl TokenCursor { TokenCursorFrame::new( delim_span, token::NoDelim, + false, if attr_style == AttrStyle::Inner { [ TokenTree::token(token::Pound, span), @@ -343,6 +344,7 @@ impl TokenCursor { .cloned() .collect::() }, + false, ), )); @@ -434,9 +436,8 @@ impl<'a> Parser<'a> { desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { - let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); - start_frame.open_delim = true; - start_frame.close_delim = true; + let start_frame = + TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, true, tokens, true); let mut parser = Parser { sess, From d235ac7801367afcdd0712c43dd53fab7d6ff95b Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 19 Apr 2022 13:53:05 +1000 Subject: [PATCH 07/20] Handle `Delimited` opening immediately. Instead of letting the next iteration of the loop handle it. --- compiler/rustc_parse/src/parser/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 2e2fc6694a5..20f8ab8a6e7 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -274,8 +274,10 @@ impl TokenCursor { break (token, spacing); } TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, false, tts, false); + // Set `open_delim` to true here because we deal with it immediately. + let frame = TokenCursorFrame::new(sp, delim, true, tts, false); self.stack.push(mem::replace(&mut self.frame, frame)); + return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); } } } else if !self.frame.close_delim { From f1c32c10c476c5a77c662ce70cb04639cda3eb4b Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 19 Apr 2022 17:01:26 +1000 Subject: [PATCH 08/20] Move desugaring code into its own function. It's not hot, so shouldn't be within the always inlined part. --- compiler/rustc_parse/src/parser/mod.rs | 131 ++++++++++++------------- 1 file changed, 62 insertions(+), 69 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 20f8ab8a6e7..925d6ac405b 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -261,7 +261,7 @@ impl TokenCursor { /// This always-inlined version should only be used on hot code paths. #[inline(always)] fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { - let (token, spacing) = loop { + loop { if !self.frame.open_delim { self.frame.open_delim = true; return ( @@ -269,17 +269,20 @@ impl TokenCursor { Spacing::Alone, ); } else if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing() { - match tree { - TokenTree::Token(token) => { - break (token, spacing); - } + return match tree { + TokenTree::Token(token) => match (desugar_doc_comments, &token) { + (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { + self.desugar(attr_style, data, span) + } + _ => (token, spacing), + }, TokenTree::Delimited(sp, delim, tts) => { // Set `open_delim` to true here because we deal with it immediately. let frame = TokenCursorFrame::new(sp, delim, true, tts, false); self.stack.push(mem::replace(&mut self.frame, frame)); - return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); + (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone) } - } + }; } else if !self.frame.close_delim { self.frame.close_delim = true; return ( @@ -291,70 +294,60 @@ impl TokenCursor { } else { return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } - }; - - match (desugar_doc_comments, &token) { - (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { - // Searches for the occurrences of `"#*` and returns the minimum number of `#`s - // required to wrap the text. - let mut num_of_hashes = 0; - let mut count = 0; - for ch in data.as_str().chars() { - count = match ch { - '"' => 1, - '#' if count > 0 => count + 1, - _ => 0, - }; - num_of_hashes = cmp::max(num_of_hashes, count); - } - - let delim_span = DelimSpan::from_single(span); - let body = TokenTree::Delimited( - delim_span, - token::Bracket, - [ - TokenTree::token(token::Ident(sym::doc, false), span), - TokenTree::token(token::Eq, span), - TokenTree::token( - TokenKind::lit(token::StrRaw(num_of_hashes), data, None), - span, - ), - ] - .iter() - .cloned() - .collect::(), - ); - - self.stack.push(mem::replace( - &mut self.frame, - TokenCursorFrame::new( - delim_span, - token::NoDelim, - false, - if attr_style == AttrStyle::Inner { - [ - TokenTree::token(token::Pound, span), - TokenTree::token(token::Not, span), - body, - ] - .iter() - .cloned() - .collect::() - } else { - [TokenTree::token(token::Pound, span), body] - .iter() - .cloned() - .collect::() - }, - false, - ), - )); - - self.next(/* desugar_doc_comments */ false) - } - _ => (token, spacing), } } + + fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { + // Searches for the occurrences of `"#*` and returns the minimum number of `#`s + // required to wrap the text. + let mut num_of_hashes = 0; + let mut count = 0; + for ch in data.as_str().chars() { + count = match ch { + '"' => 1, + '#' if count > 0 => count + 1, + _ => 0, + }; + num_of_hashes = cmp::max(num_of_hashes, count); + } + + let delim_span = DelimSpan::from_single(span); + let body = TokenTree::Delimited( + delim_span, + token::Bracket, + [ + TokenTree::token(token::Ident(sym::doc, false), span), + TokenTree::token(token::Eq, span), + TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), span), + ] + .iter() + .cloned() + .collect::(), + ); + + self.stack.push(mem::replace( + &mut self.frame, + TokenCursorFrame::new( + delim_span, + token::NoDelim, + false, + if attr_style == AttrStyle::Inner { + [TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body] + .iter() + .cloned() + .collect::() + } else { + [TokenTree::token(token::Pound, span), body] + .iter() + .cloned() + .collect::() + }, + false, + ), + )); + + self.next(/* desugar_doc_comments */ false) + } } #[derive(Debug, Clone, PartialEq)] From 804103b0aedf38cbc1a0c0dbf37a627eb48b8902 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 09:31:34 +1000 Subject: [PATCH 09/20] Add a size assertion for `Parser`. --- compiler/rustc_parse/src/parser/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 925d6ac405b..67bfb98ccd3 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -150,6 +150,11 @@ pub struct Parser<'a> { pub current_closure: Option, } +// This type is used a lot, e.g. it's cloned when matching many declarative macro rules. Make sure +// it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(Parser<'_>, 328); + /// Stores span information about a closure. #[derive(Clone)] pub struct ClosureSpans { From 86723d3d468745aa5e6494a766ea9ba6cb283b3c Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 11:49:15 +1000 Subject: [PATCH 10/20] Use `true` for `open_delim`/`close_delim` in one spot. The `DelimToken` here is `NoDelim`, which means the returned delim tokens will just be ignored by `Parser::bump()`. This commit changes things so the delim tokens won't be returned. --- compiler/rustc_parse/src/parser/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 67bfb98ccd3..a337d8fae0f 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -335,7 +335,7 @@ impl TokenCursor { TokenCursorFrame::new( delim_span, token::NoDelim, - false, + true, if attr_style == AttrStyle::Inner { [TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body] .iter() @@ -347,7 +347,7 @@ impl TokenCursor { .cloned() .collect::() }, - false, + true, ), )); From 3cd5e346170437f844d18c17d3f870dc4722c96a Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 12:22:03 +1000 Subject: [PATCH 11/20] Remove `TokenCursorFrame::open_delim`. Because it's now always true. --- compiler/rustc_parse/src/parser/mod.rs | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index a337d8fae0f..a276fb53895 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -241,20 +241,13 @@ struct TokenCursor { struct TokenCursorFrame { delim: token::DelimToken, span: DelimSpan, - open_delim: bool, tree_cursor: tokenstream::Cursor, close_delim: bool, } impl TokenCursorFrame { - fn new( - span: DelimSpan, - delim: DelimToken, - open_delim: bool, - tts: TokenStream, - close_delim: bool, - ) -> Self { - TokenCursorFrame { delim, span, open_delim, tree_cursor: tts.into_trees(), close_delim } + fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream, close_delim: bool) -> Self { + TokenCursorFrame { delim, span, tree_cursor: tts.into_trees(), close_delim } } } @@ -267,13 +260,7 @@ impl TokenCursor { #[inline(always)] fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { loop { - if !self.frame.open_delim { - self.frame.open_delim = true; - return ( - Token::new(token::OpenDelim(self.frame.delim), self.frame.span.open), - Spacing::Alone, - ); - } else if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing() { + if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing() { return match tree { TokenTree::Token(token) => match (desugar_doc_comments, &token) { (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { @@ -283,7 +270,7 @@ impl TokenCursor { }, TokenTree::Delimited(sp, delim, tts) => { // Set `open_delim` to true here because we deal with it immediately. - let frame = TokenCursorFrame::new(sp, delim, true, tts, false); + let frame = TokenCursorFrame::new(sp, delim, tts, false); self.stack.push(mem::replace(&mut self.frame, frame)); (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone) } @@ -335,7 +322,6 @@ impl TokenCursor { TokenCursorFrame::new( delim_span, token::NoDelim, - true, if attr_style == AttrStyle::Inner { [TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body] .iter() @@ -436,8 +422,7 @@ impl<'a> Parser<'a> { desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { - let start_frame = - TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, true, tokens, true); + let start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens, true); let mut parser = Parser { sess, From b09522a634f5c6c53dd01ad60a37b6503355ce88 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 12:22:42 +1000 Subject: [PATCH 12/20] Remove the loop from `Parser::bump()`. The loop is there to handle a `NoDelim` open/close token. This commit changes `TokenCursor::inlined_next` so it never returns such a token. This is a performance win because the conditional test in `bump()` is removed. If the parser needs changing in the future to handle `NoDelim` tokens, then `inlined_next()` can easily be changed to return them. --- compiler/rustc_parse/src/parser/mod.rs | 63 ++++++++++++++------------ 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index a276fb53895..173e0c2d529 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -242,12 +242,17 @@ struct TokenCursorFrame { delim: token::DelimToken, span: DelimSpan, tree_cursor: tokenstream::Cursor, - close_delim: bool, + need_to_produce_close_delim: bool, } impl TokenCursorFrame { - fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream, close_delim: bool) -> Self { - TokenCursorFrame { delim, span, tree_cursor: tts.into_trees(), close_delim } + fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { + TokenCursorFrame { + delim, + span, + tree_cursor: tts.into_trees(), + need_to_produce_close_delim: delim != DelimToken::NoDelim, + } } } @@ -261,28 +266,32 @@ impl TokenCursor { fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { loop { if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing() { - return match tree { + match tree { TokenTree::Token(token) => match (desugar_doc_comments, &token) { (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { - self.desugar(attr_style, data, span) + return self.desugar(attr_style, data, span); } - _ => (token, spacing), + _ => return (token, spacing), }, TokenTree::Delimited(sp, delim, tts) => { // Set `open_delim` to true here because we deal with it immediately. - let frame = TokenCursorFrame::new(sp, delim, tts, false); + let frame = TokenCursorFrame::new(sp, delim, tts); self.stack.push(mem::replace(&mut self.frame, frame)); - (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone) + if delim != DelimToken::NoDelim { + return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); + } + // No open delimeter to return; continue on to the next iteration. } }; - } else if !self.frame.close_delim { - self.frame.close_delim = true; + } else if self.frame.need_to_produce_close_delim { + self.frame.need_to_produce_close_delim = false; return ( Token::new(token::CloseDelim(self.frame.delim), self.frame.span.close), Spacing::Alone, ); } else if let Some(frame) = self.stack.pop() { self.frame = frame; + // Back to the parent frame; continue on to the next iteration. } else { return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } @@ -333,7 +342,6 @@ impl TokenCursor { .cloned() .collect::() }, - true, ), )); @@ -422,7 +430,7 @@ impl<'a> Parser<'a> { desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { - let start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens, true); + let start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); let mut parser = Parser { sess, @@ -993,24 +1001,21 @@ impl<'a> Parser<'a> { /// Advance the parser by one token. pub fn bump(&mut self) { let fallback_span = self.token.span; - loop { - let (mut next, spacing) = self.token_cursor.inlined_next(self.desugar_doc_comments); - self.token_cursor.num_next_calls += 1; - // We've retrieved an token from the underlying - // cursor, so we no longer need to worry about - // an unglued token. See `break_and_eat` for more details - self.token_cursor.break_last_token = false; - if next.span.is_dummy() { - // Tweak the location for better diagnostics, but keep syntactic context intact. - next.span = fallback_span.with_ctxt(next.span.ctxt()); - } - if !matches!( - next.kind, - token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) - ) { - return self.inlined_bump_with((next, spacing)); - } + let (mut next, spacing) = self.token_cursor.inlined_next(self.desugar_doc_comments); + self.token_cursor.num_next_calls += 1; + // We've retrieved an token from the underlying + // cursor, so we no longer need to worry about + // an unglued token. See `break_and_eat` for more details + self.token_cursor.break_last_token = false; + if next.span.is_dummy() { + // Tweak the location for better diagnostics, but keep syntactic context intact. + next.span = fallback_span.with_ctxt(next.span.ctxt()); } + debug_assert!(!matches!( + next.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + )); + self.inlined_bump_with((next, spacing)) } /// Look-ahead `dist` tokens of `self.token` and get access to that token there. From 5b653c1a43b88d39a127b4b24dac327080151268 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 12:43:25 +1000 Subject: [PATCH 13/20] Inline `Cursor::next_with_spacing`. --- compiler/rustc_ast/src/tokenstream.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 857dc307c98..3321d3bf380 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -575,6 +575,7 @@ impl Cursor { Cursor { stream, index: 0 } } + #[inline] pub fn next_with_spacing(&mut self) -> Option { self.stream.0.get(self.index).map(|tree| { self.index += 1; From 9e6879fdba34e5f5b2657b6666f00f6091b89545 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 14:04:22 +1000 Subject: [PATCH 14/20] Only record `fallback_span` when necessary. --- compiler/rustc_parse/src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 173e0c2d529..28be9f4b592 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -1000,7 +1000,6 @@ impl<'a> Parser<'a> { /// Advance the parser by one token. pub fn bump(&mut self) { - let fallback_span = self.token.span; let (mut next, spacing) = self.token_cursor.inlined_next(self.desugar_doc_comments); self.token_cursor.num_next_calls += 1; // We've retrieved an token from the underlying @@ -1009,6 +1008,7 @@ impl<'a> Parser<'a> { self.token_cursor.break_last_token = false; if next.span.is_dummy() { // Tweak the location for better diagnostics, but keep syntactic context intact. + let fallback_span = self.token.span; next.span = fallback_span.with_ctxt(next.span.ctxt()); } debug_assert!(!matches!( From f9235db37ea7253d5cd668b0155efad39530ffdb Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 14:08:59 +1000 Subject: [PATCH 15/20] Inline `Parser::parse_nonterminal`. --- compiler/rustc_parse/src/parser/nonterminal.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index b45bca3d2e0..c5caa3ea636 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -95,7 +95,9 @@ impl<'a> Parser<'a> { } } - /// Parse a non-terminal (e.g. MBE `:pat` or `:ident`). + /// Parse a non-terminal (e.g. MBE `:pat` or `:ident`). Inlined because there is only one call + /// site. + #[inline] pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, NtOrTt> { // Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`) // needs to have them force-captured here. From d2b9bbbf783e2921fbca28eae06314e8b7f8a89a Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 14:13:49 +1000 Subject: [PATCH 16/20] Inline `Parser::nonterminal_may_begin_with`. --- compiler/rustc_parse/src/parser/nonterminal.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index c5caa3ea636..691bfdb01a4 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -11,8 +11,10 @@ use crate::parser::{FollowedByType, ForceCollect, NtOrTt, Parser, PathStyle}; impl<'a> Parser<'a> { /// Checks whether a non-terminal may begin with a particular token. /// - /// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that - /// token. Be conservative (return true) if not sure. + /// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with + /// that token. Be conservative (return true) if not sure. Inlined because it has a single call + /// site. + #[inline] pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool { /// Checks whether the non-terminal may contain a single (non-keyword) identifier. fn may_be_ident(nt: &token::Nonterminal) -> bool { From 880318c70a3cf676acae9c1e61ac6519f7f67f46 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 14:52:54 +1000 Subject: [PATCH 17/20] Remove `Eof` sanity check in `Parser::inlined_bump_with`. A Google search of the error message fails to return any relevant resuts, suggesting this has never occurred in practice. And removeing it reduces instruction counts by up to 2% on some benchmarks. --- compiler/rustc_parse/src/parser/mod.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 28be9f4b592..581d706f682 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -984,12 +984,6 @@ impl<'a> Parser<'a> { /// This always-inlined version should only be used on hot code paths. #[inline(always)] fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) { - // Bumping after EOF is a bad sign, usually an infinite loop. - if self.prev_token.kind == TokenKind::Eof { - let msg = "attempted to bump the parser past EOF (may be stuck in a loop)"; - self.span_bug(self.token.span, msg); - } - // Update the current and previous tokens. self.prev_token = mem::replace(&mut self.token, next_token); self.token_spacing = next_spacing; From 7a89255b20d6fcbaf96ceeddcf6de119ee4ae0a5 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 20 Apr 2022 16:34:33 +1000 Subject: [PATCH 18/20] Avoid some tuple destructuring. Surprisingly, this is a non-trivial performance win. --- compiler/rustc_parse/src/parser/mod.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 581d706f682..450bdb510a5 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -994,22 +994,24 @@ impl<'a> Parser<'a> { /// Advance the parser by one token. pub fn bump(&mut self) { - let (mut next, spacing) = self.token_cursor.inlined_next(self.desugar_doc_comments); + // Note: destructuring here would give nicer code, but it was found in #96210 to be slower + // than `.0`/`.1` access. + let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments); self.token_cursor.num_next_calls += 1; // We've retrieved an token from the underlying // cursor, so we no longer need to worry about // an unglued token. See `break_and_eat` for more details self.token_cursor.break_last_token = false; - if next.span.is_dummy() { + if next.0.span.is_dummy() { // Tweak the location for better diagnostics, but keep syntactic context intact. let fallback_span = self.token.span; - next.span = fallback_span.with_ctxt(next.span.ctxt()); + next.0.span = fallback_span.with_ctxt(next.0.span.ctxt()); } debug_assert!(!matches!( - next.kind, + next.0.kind, token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) )); - self.inlined_bump_with((next, spacing)) + self.inlined_bump_with(next) } /// Look-ahead `dist` tokens of `self.token` and get access to that token there. From cc4e3443ecf96f395e598b14af208d36a11ffb9f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 21 Apr 2022 12:26:58 +1000 Subject: [PATCH 19/20] Produce `CloseDelim` and pop the stack at the same time. This makes `CloseDelim` handling more like `OpenDelim` handling, which produces `OpenDelim` and pushes the stack at the same time. It requires some adjustment to `parse_token_tree` now that we don't remain within the frame after getting the `CloseDelim`. --- compiler/rustc_parse/src/parser/mod.rs | 63 ++++++++++++++------------ 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 450bdb510a5..a620266247a 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -208,7 +208,12 @@ impl<'a> Drop for Parser<'a> { #[derive(Clone)] struct TokenCursor { + // The current (innermost) frame. `frame` and `stack` could be combined, + // but it's faster to have them separately to access `frame` directly + // rather than via something like `stack.last().unwrap()` or + // `stack[stack.len() - 1]`. frame: TokenCursorFrame, + // Additional frames that enclose `frame`. stack: Vec, desugar_doc_comments: bool, // Counts the number of calls to `{,inlined_}next`. @@ -242,17 +247,11 @@ struct TokenCursorFrame { delim: token::DelimToken, span: DelimSpan, tree_cursor: tokenstream::Cursor, - need_to_produce_close_delim: bool, } impl TokenCursorFrame { fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { - TokenCursorFrame { - delim, - span, - tree_cursor: tts.into_trees(), - need_to_produce_close_delim: delim != DelimToken::NoDelim, - } + TokenCursorFrame { delim, span, tree_cursor: tts.into_trees() } } } @@ -265,6 +264,9 @@ impl TokenCursor { #[inline(always)] fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { loop { + // FIXME: we currently don't return `NoDelim` open/close delims. To fix #67062 we will + // need to, whereupon the `delim != DelimToken::NoDelim` conditions below can be + // removed, as well as the loop. if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing() { match tree { TokenTree::Token(token) => match (desugar_doc_comments, &token) { @@ -283,15 +285,14 @@ impl TokenCursor { // No open delimeter to return; continue on to the next iteration. } }; - } else if self.frame.need_to_produce_close_delim { - self.frame.need_to_produce_close_delim = false; - return ( - Token::new(token::CloseDelim(self.frame.delim), self.frame.span.close), - Spacing::Alone, - ); } else if let Some(frame) = self.stack.pop() { + let delim = self.frame.delim; + let span = self.frame.span; self.frame = frame; - // Back to the parent frame; continue on to the next iteration. + if delim != DelimToken::NoDelim { + return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); + } + // No close delimiter to return; continue on to the next iteration. } else { return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } @@ -430,6 +431,8 @@ impl<'a> Parser<'a> { desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { + // Note: because of the way `TokenCursor::inlined_next` is structured, the `span` and + // `delim` arguments here are never used. let start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); let mut parser = Parser { @@ -1192,24 +1195,28 @@ impl<'a> Parser<'a> { pub(crate) fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { - let depth = self.token_cursor.stack.len(); - - // We keep advancing the token cursor until we hit - // the matching `CloseDelim` token. - while !(depth == self.token_cursor.stack.len() - && matches!(self.token.kind, token::CloseDelim(_))) - { - // Advance one token at a time, so `TokenCursor::next()` - // can capture these tokens if necessary. - self.bump(); - } - // We are still inside the frame corresponding - // to the delimited stream we captured, so grab - // the tokens from this frame. + // Grab the tokens from this frame. let frame = &self.token_cursor.frame; let stream = frame.tree_cursor.stream.clone(); let span = frame.span; let delim = frame.delim; + + // Advance the token cursor through the entire delimited + // sequence. After getting the `OpenDelim` we are *within* the + // delimited sequence, i.e. at depth `d`. After getting the + // matching `CloseDelim` we are *after* the delimited sequence, + // i.e. at depth `d - 1`. + let target_depth = self.token_cursor.stack.len() - 1; + loop { + // Advance one token at a time, so `TokenCursor::next()` + // can capture these tokens if necessary. + self.bump(); + if self.token_cursor.stack.len() == target_depth { + debug_assert!(matches!(self.token.kind, token::CloseDelim(_))); + break; + } + } + // Consume close delimiter self.bump(); TokenTree::Delimited(span, delim, stream) From 643e9f707ed4ca13a158b6e290b424e520809ca6 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 21 Apr 2022 13:49:40 +1000 Subject: [PATCH 20/20] Introduced `Cursor::next_with_spacing_ref`. This lets us clone just the parts within a `TokenTree` that need cloning, rather than the entire thing. This is a surprisingly large performance win, up to 4% on `async-std-1.10.0`. --- compiler/rustc_ast/src/tokenstream.rs | 8 ++++++++ compiler/rustc_parse/src/parser/mod.rs | 10 +++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 3321d3bf380..d609fa67205 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -583,6 +583,14 @@ impl Cursor { }) } + #[inline] + pub fn next_with_spacing_ref(&mut self) -> Option<&TreeAndSpacing> { + self.stream.0.get(self.index).map(|tree| { + self.index += 1; + tree + }) + } + pub fn index(&self) -> usize { self.index } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index a620266247a..1686c5873e1 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -267,17 +267,17 @@ impl TokenCursor { // FIXME: we currently don't return `NoDelim` open/close delims. To fix #67062 we will // need to, whereupon the `delim != DelimToken::NoDelim` conditions below can be // removed, as well as the loop. - if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing() { + if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing_ref() { match tree { - TokenTree::Token(token) => match (desugar_doc_comments, &token) { + &TokenTree::Token(ref token) => match (desugar_doc_comments, token) { (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { return self.desugar(attr_style, data, span); } - _ => return (token, spacing), + _ => return (token.clone(), *spacing), }, - TokenTree::Delimited(sp, delim, tts) => { + &TokenTree::Delimited(sp, delim, ref tts) => { // Set `open_delim` to true here because we deal with it immediately. - let frame = TokenCursorFrame::new(sp, delim, tts); + let frame = TokenCursorFrame::new(sp, delim, tts.clone()); self.stack.push(mem::replace(&mut self.frame, frame)); if delim != DelimToken::NoDelim { return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);