Remove `TokenStreamBuilder`.

`TokenStreamBuilder` exists to concatenate multiple `TokenStream`s
together. This commit removes it, and moves the concatenation
functionality directly into `TokenStream`, via two new methods
`push_tree` and `push_stream`. This makes things both simpler and
faster.

`push_tree` is particularly important. `TokenStreamBuilder` only had a
single `push` method, which pushed a stream. But in practice most of the
time we push a single token tree rather than a stream, and `push_tree`
avoids the need to build a token stream with a single entry (which
requires two allocations, one for the `Lrc` and one for the `Vec`).

The main `push_tree` use arises from a change to one of the `ToInternal`
impls in `proc_macro_server.rs`. It now returns a `SmallVec` instead of
a `TokenStream`. This return value is then iterated over by
`concat_trees`, which does `push_tree` on each element. Furthermore, the
use of `SmallVec` avoids more allocations, because there is always only
one or two token trees.

Note: the removed `TokenStreamBuilder::push` method had some code to
deal with a quadratic blowup case from #57735. This commit removes the
code. I tried and failed to reproduce the blowup from that PR, before
and after this change. Various other changes have happened to
`TokenStreamBuilder` in the meantime, so I suspect the original problem
is no longer relevant, though I don't have proof of this. Generally
speaking, repeatedly extending a `Vec` without pre-determining its
capacity is *not* quadratic. It's also incredibly common, within rustc
and many other Rust programs, so if there were performance problems
there you'd think it would show up in other places, too.
This commit is contained in:
Nicholas Nethercote 2022-10-05 10:38:15 +11:00
parent 1e8dc45fb5
commit 1e848a564b
3 changed files with 82 additions and 110 deletions

View File

@ -245,12 +245,12 @@ impl AttrTokenStream {
// properly implemented - we always synthesize fake tokens,
// so we never reach this code.
let mut builder = TokenStreamBuilder::new();
let mut stream = TokenStream::default();
for inner_attr in inner_attrs {
builder.push(inner_attr.tokens());
stream.push_stream(inner_attr.tokens());
}
builder.push(delim_tokens.clone());
*tree = TokenTree::Delimited(*span, *delim, builder.build());
stream.push_stream(delim_tokens.clone());
*tree = TokenTree::Delimited(*span, *delim, stream);
found = true;
break;
}
@ -505,76 +505,49 @@ impl TokenStream {
self.trees().map(|tree| TokenStream::flatten_token_tree(tree)).collect()
}
}
// 99.5%+ of the time we have 1 or 2 elements in this vector.
#[derive(Clone)]
pub struct TokenStreamBuilder(SmallVec<[TokenStream; 2]>);
impl TokenStreamBuilder {
pub fn new() -> TokenStreamBuilder {
TokenStreamBuilder(SmallVec::new())
// If `vec` is not empty, try to glue `tt` onto its last token. The return
// value indicates if gluing took place.
fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
if let Some(TokenTree::Token(last_tok, Spacing::Joint)) = vec.last()
&& let TokenTree::Token(tok, spacing) = tt
&& let Some(glued_tok) = last_tok.glue(&tok)
{
// ...then overwrite the last token tree in `vec` with the
// glued token, and skip the first token tree from `stream`.
*vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
true
} else {
false
}
}
pub fn push(&mut self, stream: TokenStream) {
self.0.push(stream);
// Push `tt` onto the end of the stream, possibly gluing it to the last
// token. Uses `make_mut` to maximize efficiency.
pub fn push_tree(&mut self, tt: TokenTree) {
let vec_mut = Lrc::make_mut(&mut self.0);
if Self::try_glue_to_last(vec_mut, &tt) {
// nothing else to do
} else {
vec_mut.push(tt);
}
}
pub fn build(self) -> TokenStream {
let mut streams = self.0;
match streams.len() {
0 => TokenStream::default(),
1 => streams.pop().unwrap(),
_ => {
// We will extend the first stream in `streams` with the
// elements from the subsequent streams. This requires using
// `make_mut()` on the first stream, and in practice this
// doesn't cause cloning 99.9% of the time.
//
// One very common use case is when `streams` has two elements,
// where the first stream has any number of elements within
// (often 1, but sometimes many more) and the second stream has
// a single element within.
// Push `stream` onto the end of the stream, possibly gluing the first
// token tree to the last token. (No other token trees will be glued.)
// Uses `make_mut` to maximize efficiency.
pub fn push_stream(&mut self, stream: TokenStream) {
let vec_mut = Lrc::make_mut(&mut self.0);
// Determine how much the first stream will be extended.
// Needed to avoid quadratic blow up from on-the-fly
// reallocations (#57735).
let num_appends = streams.iter().skip(1).map(|ts| ts.len()).sum();
let stream_iter = stream.0.iter().cloned();
// Get the first stream, which will become the result stream.
// If it's `None`, create an empty stream.
let mut iter = streams.into_iter();
let mut res_stream_lrc = iter.next().unwrap().0;
// Append the subsequent elements to the result stream, after
// reserving space for them.
let res_vec_mut = Lrc::make_mut(&mut res_stream_lrc);
res_vec_mut.reserve(num_appends);
for stream in iter {
let stream_iter = stream.0.iter().cloned();
// If (a) `res_mut_vec` is not empty and the last tree
// within it is a token tree marked with `Joint`, and (b)
// `stream` is not empty and the first tree within it is a
// token tree, and (c) the two tokens can be glued
// together...
if let Some(TokenTree::Token(last_tok, Spacing::Joint)) = res_vec_mut.last()
&& let Some(TokenTree::Token(tok, spacing)) = stream.0.first()
&& let Some(glued_tok) = last_tok.glue(&tok)
{
// ...then overwrite the last token tree in
// `res_vec_mut` with the glued token, and skip the
// first token tree from `stream`.
*res_vec_mut.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
res_vec_mut.extend(stream_iter.skip(1));
} else {
// Append all of `stream`.
res_vec_mut.extend(stream_iter);
}
}
TokenStream(res_stream_lrc)
}
if let Some(first) = stream.0.first() && Self::try_glue_to_last(vec_mut, first) {
// Now skip the first token tree from `stream`.
vec_mut.extend(stream_iter.skip(1));
} else {
// Append all of `stream`.
vec_mut.extend(stream_iter);
}
}
}

View File

@ -1,5 +1,8 @@
use crate::base::ExtCtxt;
use pm::bridge::{
server, DelimSpan, Diagnostic, ExpnGlobals, Group, Ident, LitKind, Literal, Punct, TokenTree,
};
use pm::{Delimiter, Level, LineColumn};
use rustc_ast as ast;
use rustc_ast::token;
use rustc_ast::tokenstream::{self, Spacing::*, TokenStream};
@ -13,11 +16,7 @@ use rustc_session::parse::ParseSess;
use rustc_span::def_id::CrateNum;
use rustc_span::symbol::{self, sym, Symbol};
use rustc_span::{BytePos, FileName, Pos, SourceFile, Span};
use pm::bridge::{
server, DelimSpan, Diagnostic, ExpnGlobals, Group, Ident, LitKind, Literal, Punct, TokenTree,
};
use pm::{Delimiter, Level, LineColumn};
use smallvec::{smallvec, SmallVec};
use std::ops::Bound;
trait FromInternal<T> {
@ -241,8 +240,11 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre
}
}
impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rustc<'_, '_>) {
fn to_internal(self) -> TokenStream {
// We use a `SmallVec` because the output size is always one or two `TokenTree`s.
impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>>
for (TokenTree<TokenStream, Span, Symbol>, &mut Rustc<'_, '_>)
{
fn to_internal(self) -> SmallVec<[tokenstream::TokenTree; 2]> {
use rustc_ast::token::*;
let (tree, rustc) = self;
@ -273,22 +275,22 @@ impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rus
b'\'' => SingleQuote,
_ => unreachable!(),
};
if joint {
tokenstream::TokenStream::token_joint(kind, span)
smallvec![if joint {
tokenstream::TokenTree::token_joint(kind, span)
} else {
tokenstream::TokenStream::token_alone(kind, span)
}
tokenstream::TokenTree::token_alone(kind, span)
}]
}
TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => {
tokenstream::TokenStream::delimited(
smallvec![tokenstream::TokenTree::Delimited(
tokenstream::DelimSpan { open, close },
delimiter.to_internal(),
stream.unwrap_or_default(),
)
)]
}
TokenTree::Ident(self::Ident { sym, is_raw, span }) => {
rustc.sess().symbol_gallery.insert(sym, span);
tokenstream::TokenStream::token_alone(Ident(sym, is_raw), span)
smallvec![tokenstream::TokenTree::token_alone(Ident(sym, is_raw), span)]
}
TokenTree::Literal(self::Literal {
kind: self::LitKind::Integer,
@ -301,7 +303,7 @@ impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rus
let integer = TokenKind::lit(token::Integer, symbol, suffix);
let a = tokenstream::TokenTree::token_alone(minus, span);
let b = tokenstream::TokenTree::token_alone(integer, span);
[a, b].into_iter().collect()
smallvec![a, b]
}
TokenTree::Literal(self::Literal {
kind: self::LitKind::Float,
@ -314,13 +316,13 @@ impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rus
let float = TokenKind::lit(token::Float, symbol, suffix);
let a = tokenstream::TokenTree::token_alone(minus, span);
let b = tokenstream::TokenTree::token_alone(float, span);
[a, b].into_iter().collect()
smallvec![a, b]
}
TokenTree::Literal(self::Literal { kind, symbol, suffix, span }) => {
tokenstream::TokenStream::token_alone(
smallvec![tokenstream::TokenTree::token_alone(
TokenKind::lit(kind.to_internal(), symbol, suffix),
span,
)
)]
}
}
}
@ -536,7 +538,7 @@ impl server::TokenStream for Rustc<'_, '_> {
&mut self,
tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol>,
) -> Self::TokenStream {
(tree, &mut *self).to_internal()
Self::TokenStream::new((tree, &mut *self).to_internal().into_iter().collect::<Vec<_>>())
}
fn concat_trees(
@ -544,14 +546,14 @@ impl server::TokenStream for Rustc<'_, '_> {
base: Option<Self::TokenStream>,
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol>>,
) -> Self::TokenStream {
let mut builder = tokenstream::TokenStreamBuilder::new();
if let Some(base) = base {
builder.push(base);
}
let mut stream =
if let Some(base) = base { base } else { tokenstream::TokenStream::default() };
for tree in trees {
builder.push((tree, &mut *self).to_internal());
for tt in (tree, &mut *self).to_internal() {
stream.push_tree(tt);
}
}
builder.build()
stream
}
fn concat_streams(
@ -559,14 +561,12 @@ impl server::TokenStream for Rustc<'_, '_> {
base: Option<Self::TokenStream>,
streams: Vec<Self::TokenStream>,
) -> Self::TokenStream {
let mut builder = tokenstream::TokenStreamBuilder::new();
if let Some(base) = base {
builder.push(base);
let mut stream =
if let Some(base) = base { base } else { tokenstream::TokenStream::default() };
for s in streams {
stream.push_stream(s);
}
for stream in streams {
builder.push(stream);
}
builder.build()
stream
}
fn into_trees(
@ -692,6 +692,7 @@ impl server::Span for Rustc<'_, '_> {
fn source_text(&mut self, span: Self::Span) -> Option<String> {
self.sess().source_map().span_to_snippet(span).ok()
}
/// Saves the provided span into the metadata of
/// *the crate we are currently compiling*, which must
/// be a proc-macro crate. This id can be passed to

View File

@ -1,7 +1,7 @@
use crate::tests::string_to_stream;
use rustc_ast::token;
use rustc_ast::tokenstream::{TokenStream, TokenStreamBuilder};
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_span::create_default_session_globals_then;
use rustc_span::{BytePos, Span, Symbol};
@ -19,10 +19,9 @@ fn test_concat() {
let test_res = string_to_ts("foo::bar::baz");
let test_fst = string_to_ts("foo::bar");
let test_snd = string_to_ts("::baz");
let mut builder = TokenStreamBuilder::new();
builder.push(test_fst);
builder.push(test_snd);
let eq_res = builder.build();
let mut eq_res = TokenStream::default();
eq_res.push_stream(test_fst);
eq_res.push_stream(test_snd);
assert_eq!(test_res.trees().count(), 5);
assert_eq!(eq_res.trees().count(), 5);
assert_eq!(test_res.eq_unspanned(&eq_res), true);
@ -99,11 +98,10 @@ fn test_is_empty() {
#[test]
fn test_dotdotdot() {
create_default_session_globals_then(|| {
let mut builder = TokenStreamBuilder::new();
builder.push(TokenStream::token_joint(token::Dot, sp(0, 1)));
builder.push(TokenStream::token_joint(token::Dot, sp(1, 2)));
builder.push(TokenStream::token_alone(token::Dot, sp(2, 3)));
let stream = builder.build();
let mut stream = TokenStream::default();
stream.push_tree(TokenTree::token_joint(token::Dot, sp(0, 1)));
stream.push_tree(TokenTree::token_joint(token::Dot, sp(1, 2)));
stream.push_tree(TokenTree::token_alone(token::Dot, sp(2, 3)));
assert!(stream.eq_unspanned(&string_to_ts("...")));
assert_eq!(stream.trees().count(), 1);
})