Auto merge of #96210 - nnethercote:speed-up-TokenCursor, r=petrochenkov

Speed up `TokenCursor`

Plus a few related clean-ups.

r? `@petrochenkov`
This commit is contained in:
bors 2022-04-21 15:42:50 +00:00
commit b04c5329e1
4 changed files with 119 additions and 141 deletions

View File

@ -94,16 +94,6 @@ impl TokenTree {
TokenTree::Token(Token::new(kind, span))
}
/// Returns the opening delimiter as a token tree.
pub fn open_tt(span: DelimSpan, delim: DelimToken) -> TokenTree {
TokenTree::token(token::OpenDelim(delim), span.open)
}
/// Returns the closing delimiter as a token tree.
pub fn close_tt(span: DelimSpan, delim: DelimToken) -> TokenTree {
TokenTree::token(token::CloseDelim(delim), span.close)
}
pub fn uninterpolate(self) -> TokenTree {
match self {
TokenTree::Token(token) => TokenTree::Token(token.uninterpolate().into_owned()),
@ -585,13 +575,20 @@ impl Cursor {
Cursor { stream, index: 0 }
}
#[inline]
pub fn next_with_spacing(&mut self) -> Option<TreeAndSpacing> {
if self.index < self.stream.len() {
self.stream.0.get(self.index).map(|tree| {
self.index += 1;
Some(self.stream.0[self.index - 1].clone())
} else {
None
}
tree.clone()
})
}
#[inline]
pub fn next_with_spacing_ref(&mut self) -> Option<&TreeAndSpacing> {
self.stream.0.get(self.index).map(|tree| {
self.index += 1;
tree
})
}
pub fn index(&self) -> usize {

View File

@ -100,21 +100,16 @@ rustc_data_structures::static_assert_size!(LazyTokenStreamImpl, 144);
impl CreateTokenStream for LazyTokenStreamImpl {
fn create_token_stream(&self) -> AttrAnnotatedTokenStream {
// The token produced by the final call to `{,inlined_}next` or
// `{,inlined_}next_desugared` was not actually consumed by the
// callback. The combination of chaining the initial token and using
// `take` produces the desired result - we produce an empty
// `TokenStream` if no calls were made, and omit the final token
// otherwise.
// The token produced by the final call to `{,inlined_}next` was not
// actually consumed by the callback. The combination of chaining the
// initial token and using `take` produces the desired result - we
// produce an empty `TokenStream` if no calls were made, and omit the
// final token otherwise.
let mut cursor_snapshot = self.cursor_snapshot.clone();
let tokens =
std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
.chain((0..self.num_calls).map(|_| {
let token = if cursor_snapshot.desugar_doc_comments {
cursor_snapshot.next_desugared()
} else {
cursor_snapshot.next()
};
let token = cursor_snapshot.next(cursor_snapshot.desugar_doc_comments);
(FlatToken::Token(token.0), token.1)
}))
.take(self.num_calls);

View File

@ -123,8 +123,8 @@ pub struct Parser<'a> {
pub capture_cfg: bool,
restrictions: Restrictions,
expected_tokens: Vec<TokenType>,
// Important: This must only be advanced from `next_tok`
// to ensure that `token_cursor.num_next_calls` is updated properly
// Important: This must only be advanced from `bump` to ensure that
// `token_cursor.num_next_calls` is updated properly.
token_cursor: TokenCursor,
desugar_doc_comments: bool,
/// This field is used to keep track of how many left angle brackets we have seen. This is
@ -150,6 +150,11 @@ pub struct Parser<'a> {
pub current_closure: Option<ClosureSpans>,
}
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules. Make sure
// it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(Parser<'_>, 328);
/// Stores span information about a closure.
#[derive(Clone)]
pub struct ClosureSpans {
@ -203,12 +208,15 @@ impl<'a> Drop for Parser<'a> {
#[derive(Clone)]
struct TokenCursor {
// The current (innermost) frame. `frame` and `stack` could be combined,
// but it's faster to have them separately to access `frame` directly
// rather than via something like `stack.last().unwrap()` or
// `stack[stack.len() - 1]`.
frame: TokenCursorFrame,
// Additional frames that enclose `frame`.
stack: Vec<TokenCursorFrame>,
desugar_doc_comments: bool,
// Counts the number of calls to `{,inlined_}next` or
// `{,inlined_}next_desugared`, depending on whether
// `desugar_doc_comments` is set.
// Counts the number of calls to `{,inlined_}next`.
num_next_calls: usize,
// During parsing, we may sometimes need to 'unglue' a
// glued token into two component tokens
@ -238,73 +246,60 @@ struct TokenCursor {
struct TokenCursorFrame {
delim: token::DelimToken,
span: DelimSpan,
open_delim: bool,
tree_cursor: tokenstream::Cursor,
close_delim: bool,
}
impl TokenCursorFrame {
fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
TokenCursorFrame {
delim,
span,
open_delim: false,
tree_cursor: tts.into_trees(),
close_delim: false,
}
TokenCursorFrame { delim, span, tree_cursor: tts.into_trees() }
}
}
impl TokenCursor {
fn next(&mut self) -> (Token, Spacing) {
self.inlined_next()
fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
self.inlined_next(desugar_doc_comments)
}
/// This always-inlined version should only be used on hot code paths.
#[inline(always)]
fn inlined_next(&mut self) -> (Token, Spacing) {
fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
loop {
let (tree, spacing) = if !self.frame.open_delim {
self.frame.open_delim = true;
TokenTree::open_tt(self.frame.span, self.frame.delim).into()
} else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() {
tree
} else if !self.frame.close_delim {
self.frame.close_delim = true;
TokenTree::close_tt(self.frame.span, self.frame.delim).into()
// FIXME: we currently don't return `NoDelim` open/close delims. To fix #67062 we will
// need to, whereupon the `delim != DelimToken::NoDelim` conditions below can be
// removed, as well as the loop.
if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing_ref() {
match tree {
&TokenTree::Token(ref token) => match (desugar_doc_comments, token) {
(true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
return self.desugar(attr_style, data, span);
}
_ => return (token.clone(), *spacing),
},
&TokenTree::Delimited(sp, delim, ref tts) => {
// Set `open_delim` to true here because we deal with it immediately.
let frame = TokenCursorFrame::new(sp, delim, tts.clone());
self.stack.push(mem::replace(&mut self.frame, frame));
if delim != DelimToken::NoDelim {
return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
}
// No open delimeter to return; continue on to the next iteration.
}
};
} else if let Some(frame) = self.stack.pop() {
let delim = self.frame.delim;
let span = self.frame.span;
self.frame = frame;
continue;
if delim != DelimToken::NoDelim {
return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
}
// No close delimiter to return; continue on to the next iteration.
} else {
(TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone)
};
match tree {
TokenTree::Token(token) => {
return (token, spacing);
}
TokenTree::Delimited(sp, delim, tts) => {
let frame = TokenCursorFrame::new(sp, delim, tts);
self.stack.push(mem::replace(&mut self.frame, frame));
}
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
}
}
}
fn next_desugared(&mut self) -> (Token, Spacing) {
self.inlined_next_desugared()
}
/// This always-inlined version should only be used on hot code paths.
#[inline(always)]
fn inlined_next_desugared(&mut self) -> (Token, Spacing) {
let (data, attr_style, sp) = match self.inlined_next() {
(Token { kind: token::DocComment(_, attr_style, data), span }, _) => {
(data, attr_style, span)
}
tok => return tok,
};
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text.
let mut num_of_hashes = 0;
@ -318,14 +313,14 @@ impl TokenCursor {
num_of_hashes = cmp::max(num_of_hashes, count);
}
let delim_span = DelimSpan::from_single(sp);
let delim_span = DelimSpan::from_single(span);
let body = TokenTree::Delimited(
delim_span,
token::Bracket,
[
TokenTree::token(token::Ident(sym::doc, false), sp),
TokenTree::token(token::Eq, sp),
TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), sp),
TokenTree::token(token::Ident(sym::doc, false), span),
TokenTree::token(token::Eq, span),
TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), span),
]
.iter()
.cloned()
@ -338,12 +333,12 @@ impl TokenCursor {
delim_span,
token::NoDelim,
if attr_style == AttrStyle::Inner {
[TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body]
[TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body]
.iter()
.cloned()
.collect::<TokenStream>()
} else {
[TokenTree::token(token::Pound, sp), body]
[TokenTree::token(token::Pound, span), body]
.iter()
.cloned()
.collect::<TokenStream>()
@ -351,7 +346,7 @@ impl TokenCursor {
),
));
self.next()
self.next(/* desugar_doc_comments */ false)
}
}
@ -436,9 +431,9 @@ impl<'a> Parser<'a> {
desugar_doc_comments: bool,
subparser_name: Option<&'static str>,
) -> Self {
let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens);
start_frame.open_delim = true;
start_frame.close_delim = true;
// Note: because of the way `TokenCursor::inlined_next` is structured, the `span` and
// `delim` arguments here are never used.
let start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens);
let mut parser = Parser {
sess,
@ -476,33 +471,6 @@ impl<'a> Parser<'a> {
parser
}
#[inline]
fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
loop {
let (mut next, spacing) = if self.desugar_doc_comments {
self.token_cursor.inlined_next_desugared()
} else {
self.token_cursor.inlined_next()
};
self.token_cursor.num_next_calls += 1;
// We've retrieved an token from the underlying
// cursor, so we no longer need to worry about
// an unglued token. See `break_and_eat` for more details
self.token_cursor.break_last_token = false;
if next.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
next.span = fallback_span.with_ctxt(next.span.ctxt());
}
if matches!(
next.kind,
token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
) {
continue;
}
return (next, spacing);
}
}
pub fn unexpected<T>(&mut self) -> PResult<'a, T> {
match self.expect_one_of(&[], &[]) {
Err(e) => Err(e),
@ -697,7 +665,7 @@ impl<'a> Parser<'a> {
//
// If we consume any additional tokens, then this token
// is not needed (we'll capture the entire 'glued' token),
// and `next_tok` will set this field to `None`
// and `bump` will set this field to `None`
self.token_cursor.break_last_token = true;
// Use the spacing of the glued token as the spacing
// of the unglued second token.
@ -1019,12 +987,6 @@ impl<'a> Parser<'a> {
/// This always-inlined version should only be used on hot code paths.
#[inline(always)]
fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) {
// Bumping after EOF is a bad sign, usually an infinite loop.
if self.prev_token.kind == TokenKind::Eof {
let msg = "attempted to bump the parser past EOF (may be stuck in a loop)";
self.span_bug(self.token.span, msg);
}
// Update the current and previous tokens.
self.prev_token = mem::replace(&mut self.token, next_token);
self.token_spacing = next_spacing;
@ -1035,8 +997,24 @@ impl<'a> Parser<'a> {
/// Advance the parser by one token.
pub fn bump(&mut self) {
let next_token = self.next_tok(self.token.span);
self.inlined_bump_with(next_token);
// Note: destructuring here would give nicer code, but it was found in #96210 to be slower
// than `.0`/`.1` access.
let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments);
self.token_cursor.num_next_calls += 1;
// We've retrieved an token from the underlying
// cursor, so we no longer need to worry about
// an unglued token. See `break_and_eat` for more details
self.token_cursor.break_last_token = false;
if next.0.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
let fallback_span = self.token.span;
next.0.span = fallback_span.with_ctxt(next.0.span.ctxt());
}
debug_assert!(!matches!(
next.0.kind,
token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
));
self.inlined_bump_with(next)
}
/// Look-ahead `dist` tokens of `self.token` and get access to that token there.
@ -1069,7 +1047,7 @@ impl<'a> Parser<'a> {
let mut i = 0;
let mut token = Token::dummy();
while i < dist {
token = cursor.next().0;
token = cursor.next(/* desugar_doc_comments */ false).0;
if matches!(
token.kind,
token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
@ -1217,24 +1195,28 @@ impl<'a> Parser<'a> {
pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
match self.token.kind {
token::OpenDelim(..) => {
let depth = self.token_cursor.stack.len();
// We keep advancing the token cursor until we hit
// the matching `CloseDelim` token.
while !(depth == self.token_cursor.stack.len()
&& matches!(self.token.kind, token::CloseDelim(_)))
{
// Advance one token at a time, so `TokenCursor::next()`
// can capture these tokens if necessary.
self.bump();
}
// We are still inside the frame corresponding
// to the delimited stream we captured, so grab
// the tokens from this frame.
// Grab the tokens from this frame.
let frame = &self.token_cursor.frame;
let stream = frame.tree_cursor.stream.clone();
let span = frame.span;
let delim = frame.delim;
// Advance the token cursor through the entire delimited
// sequence. After getting the `OpenDelim` we are *within* the
// delimited sequence, i.e. at depth `d`. After getting the
// matching `CloseDelim` we are *after* the delimited sequence,
// i.e. at depth `d - 1`.
let target_depth = self.token_cursor.stack.len() - 1;
loop {
// Advance one token at a time, so `TokenCursor::next()`
// can capture these tokens if necessary.
self.bump();
if self.token_cursor.stack.len() == target_depth {
debug_assert!(matches!(self.token.kind, token::CloseDelim(_)));
break;
}
}
// Consume close delimiter
self.bump();
TokenTree::Delimited(span, delim, stream)

View File

@ -11,8 +11,10 @@ use crate::parser::{FollowedByType, ForceCollect, NtOrTt, Parser, PathStyle};
impl<'a> Parser<'a> {
/// Checks whether a non-terminal may begin with a particular token.
///
/// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that
/// token. Be conservative (return true) if not sure.
/// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with
/// that token. Be conservative (return true) if not sure. Inlined because it has a single call
/// site.
#[inline]
pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool {
/// Checks whether the non-terminal may contain a single (non-keyword) identifier.
fn may_be_ident(nt: &token::Nonterminal) -> bool {
@ -95,7 +97,9 @@ impl<'a> Parser<'a> {
}
}
/// Parse a non-terminal (e.g. MBE `:pat` or `:ident`).
/// Parse a non-terminal (e.g. MBE `:pat` or `:ident`). Inlined because there is only one call
/// site.
#[inline]
pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, NtOrTt> {
// Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`)
// needs to have them force-captured here.