Rollup merge of #127233 - nnethercote:parser-cleanups, r=petrochenkov

Some parser cleanups

Cleanups I made while looking closely at this code.

r? ``@petrochenkov``
This commit is contained in:
Matthias Krüger 2024-07-03 17:26:55 +02:00 committed by GitHub
commit 7fdb2f5cab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 97 additions and 119 deletions

View File

@ -204,12 +204,14 @@ impl Attribute {
pub fn tokens(&self) -> TokenStream {
match &self.kind {
AttrKind::Normal(normal) => normal
.tokens
.as_ref()
.unwrap_or_else(|| panic!("attribute is missing tokens: {self:?}"))
.to_attr_token_stream()
.to_tokenstream(),
AttrKind::Normal(normal) => TokenStream::new(
normal
.tokens
.as_ref()
.unwrap_or_else(|| panic!("attribute is missing tokens: {self:?}"))
.to_attr_token_stream()
.to_token_trees(),
),
&AttrKind::DocComment(comment_kind, data) => TokenStream::token_alone(
token::DocComment(comment_kind, self.style, data),
self.span,

View File

@ -23,7 +23,6 @@ use rustc_data_structures::sync::{self, Lrc};
use rustc_macros::{Decodable, Encodable, HashStable_Generic};
use rustc_serialize::{Decodable, Encodable};
use rustc_span::{sym, Span, SpanDecoder, SpanEncoder, Symbol, DUMMY_SP};
use smallvec::{smallvec, SmallVec};
use std::borrow::Cow;
use std::{cmp, fmt, iter};
@ -180,27 +179,25 @@ impl AttrTokenStream {
AttrTokenStream(Lrc::new(tokens))
}
/// Converts this `AttrTokenStream` to a plain `TokenStream`.
/// Converts this `AttrTokenStream` to a plain `Vec<TokenTree>`.
/// During conversion, `AttrTokenTree::Attributes` get 'flattened'
/// back to a `TokenStream` of the form `outer_attr attr_target`.
/// If there are inner attributes, they are inserted into the proper
/// place in the attribute target tokens.
pub fn to_tokenstream(&self) -> TokenStream {
let trees: Vec<_> = self
.0
.iter()
.flat_map(|tree| match &tree {
pub fn to_token_trees(&self) -> Vec<TokenTree> {
let mut res = Vec::with_capacity(self.0.len());
for tree in self.0.iter() {
match tree {
AttrTokenTree::Token(inner, spacing) => {
smallvec![TokenTree::Token(inner.clone(), *spacing)].into_iter()
res.push(TokenTree::Token(inner.clone(), *spacing));
}
AttrTokenTree::Delimited(span, spacing, delim, stream) => {
smallvec![TokenTree::Delimited(
res.push(TokenTree::Delimited(
*span,
*spacing,
*delim,
stream.to_tokenstream()
),]
.into_iter()
TokenStream::new(stream.to_token_trees()),
))
}
AttrTokenTree::Attributes(data) => {
let idx = data
@ -208,14 +205,7 @@ impl AttrTokenStream {
.partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer));
let (outer_attrs, inner_attrs) = data.attrs.split_at(idx);
let mut target_tokens: Vec<_> = data
.tokens
.to_attr_token_stream()
.to_tokenstream()
.0
.iter()
.cloned()
.collect();
let mut target_tokens = data.tokens.to_attr_token_stream().to_token_trees();
if !inner_attrs.is_empty() {
let mut found = false;
// Check the last two trees (to account for a trailing semi)
@ -251,17 +241,14 @@ impl AttrTokenStream {
"Failed to find trailing delimited group in: {target_tokens:?}"
);
}
let mut flat: SmallVec<[_; 1]> =
SmallVec::with_capacity(target_tokens.len() + outer_attrs.len());
for attr in outer_attrs {
flat.extend(attr.tokens().0.iter().cloned());
res.extend(attr.tokens().0.iter().cloned());
}
flat.extend(target_tokens);
flat.into_iter()
res.extend(target_tokens);
}
})
.collect();
TokenStream::new(trees)
}
}
res
}
}
@ -409,8 +396,8 @@ impl PartialEq<TokenStream> for TokenStream {
}
impl TokenStream {
pub fn new(streams: Vec<TokenTree>) -> TokenStream {
TokenStream(Lrc::new(streams))
pub fn new(tts: Vec<TokenTree>) -> TokenStream {
TokenStream(Lrc::new(tts))
}
pub fn is_empty(&self) -> bool {
@ -461,7 +448,7 @@ impl TokenStream {
AttributesData { attrs: attrs.iter().cloned().collect(), tokens: tokens.clone() };
AttrTokenStream::new(vec![AttrTokenTree::Attributes(attr_data)])
};
attr_stream.to_tokenstream()
TokenStream::new(attr_stream.to_token_trees())
}
pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream {

View File

@ -38,16 +38,14 @@ pub(crate) fn cfg_eval(
lint_node_id: NodeId,
) -> Annotatable {
let features = Some(features);
CfgEval { cfg: &mut StripUnconfigured { sess, features, config_tokens: true, lint_node_id } }
CfgEval(StripUnconfigured { sess, features, config_tokens: true, lint_node_id })
.configure_annotatable(annotatable)
// Since the item itself has already been configured by the `InvocationCollector`,
// we know that fold result vector will contain exactly one element.
.unwrap()
}
struct CfgEval<'a, 'b> {
cfg: &'a mut StripUnconfigured<'b>,
}
struct CfgEval<'a>(StripUnconfigured<'a>);
fn flat_map_annotatable(
vis: &mut impl MutVisitor,
@ -125,9 +123,9 @@ fn has_cfg_or_cfg_attr(annotatable: &Annotatable) -> bool {
res.is_break()
}
impl CfgEval<'_, '_> {
impl CfgEval<'_> {
fn configure<T: HasAttrs + HasTokens>(&mut self, node: T) -> Option<T> {
self.cfg.configure(node)
self.0.configure(node)
}
fn configure_annotatable(&mut self, mut annotatable: Annotatable) -> Option<Annotatable> {
@ -196,7 +194,7 @@ impl CfgEval<'_, '_> {
// Re-parse the tokens, setting the `capture_cfg` flag to save extra information
// to the captured `AttrTokenStream` (specifically, we capture
// `AttrTokenTree::AttributesData` for all occurrences of `#[cfg]` and `#[cfg_attr]`)
let mut parser = Parser::new(&self.cfg.sess.psess, orig_tokens, None);
let mut parser = Parser::new(&self.0.sess.psess, orig_tokens, None);
parser.capture_cfg = true;
match parse_annotatable_with(&mut parser) {
Ok(a) => annotatable = a,
@ -212,16 +210,16 @@ impl CfgEval<'_, '_> {
}
}
impl MutVisitor for CfgEval<'_, '_> {
impl MutVisitor for CfgEval<'_> {
#[instrument(level = "trace", skip(self))]
fn visit_expr(&mut self, expr: &mut P<ast::Expr>) {
self.cfg.configure_expr(expr, false);
self.0.configure_expr(expr, false);
mut_visit::noop_visit_expr(expr, self);
}
#[instrument(level = "trace", skip(self))]
fn visit_method_receiver_expr(&mut self, expr: &mut P<ast::Expr>) {
self.cfg.configure_expr(expr, true);
self.0.configure_expr(expr, true);
mut_visit::noop_visit_expr(expr, self);
}

View File

@ -120,21 +120,21 @@ struct CollectTrackerAndEmitter<'a, 'cx, 'matcher> {
struct BestFailure {
token: Token,
position_in_tokenstream: usize,
position_in_tokenstream: u32,
msg: &'static str,
remaining_matcher: MatcherLoc,
}
impl BestFailure {
fn is_better_position(&self, position: usize) -> bool {
fn is_better_position(&self, position: u32) -> bool {
position > self.position_in_tokenstream
}
}
impl<'a, 'cx, 'matcher> Tracker<'matcher> for CollectTrackerAndEmitter<'a, 'cx, 'matcher> {
type Failure = (Token, usize, &'static str);
type Failure = (Token, u32, &'static str);
fn build_failure(tok: Token, position: usize, msg: &'static str) -> Self::Failure {
fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
(tok, position, msg)
}
@ -211,9 +211,9 @@ impl<'matcher> FailureForwarder<'matcher> {
}
impl<'matcher> Tracker<'matcher> for FailureForwarder<'matcher> {
type Failure = (Token, usize, &'static str);
type Failure = (Token, u32, &'static str);
fn build_failure(tok: Token, position: usize, msg: &'static str) -> Self::Failure {
fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
(tok, position, msg)
}

View File

@ -452,7 +452,7 @@ impl TtParser {
&mut self,
matcher: &'matcher [MatcherLoc],
token: &Token,
approx_position: usize,
approx_position: u32,
track: &mut T,
) -> Option<NamedParseResult<T::Failure>> {
// Matcher positions that would be valid if the macro invocation was over now. Only

View File

@ -153,7 +153,7 @@ pub(super) trait Tracker<'matcher> {
/// Arm failed to match. If the token is `token::Eof`, it indicates an unexpected
/// end of macro invocation. Otherwise, it indicates that no rules expected the given token.
/// The usize is the approximate position of the token in the input token stream.
fn build_failure(tok: Token, position: usize, msg: &'static str) -> Self::Failure;
fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure;
/// This is called before trying to match next MatcherLoc on the current token.
fn before_match_loc(&mut self, _parser: &TtParser, _matcher: &'matcher MatcherLoc) {}
@ -182,7 +182,7 @@ pub(super) struct NoopTracker;
impl<'matcher> Tracker<'matcher> for NoopTracker {
type Failure = ();
fn build_failure(_tok: Token, _position: usize, _msg: &'static str) -> Self::Failure {}
fn build_failure(_tok: Token, _position: u32, _msg: &'static str) -> Self::Failure {}
fn description() -> &'static str {
"none"

View File

@ -9,6 +9,7 @@ use rustc_session::parse::ParseSess;
use rustc_span::{sym, Span, DUMMY_SP};
use std::ops::Range;
use std::{iter, mem};
/// A wrapper type to ensure that the parser handles outer attributes correctly.
/// When we parse outer attributes, we need to ensure that we capture tokens
@ -29,15 +30,15 @@ pub struct AttrWrapper {
// The start of the outer attributes in the token cursor.
// This allows us to create a `ReplaceRange` for the entire attribute
// target, including outer attributes.
start_pos: usize,
start_pos: u32,
}
impl AttrWrapper {
pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper {
pub(super) fn new(attrs: AttrVec, start_pos: u32) -> AttrWrapper {
AttrWrapper { attrs, start_pos }
}
pub fn empty() -> AttrWrapper {
AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX }
AttrWrapper { attrs: AttrVec::new(), start_pos: u32::MAX }
}
pub(crate) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
@ -53,7 +54,7 @@ impl AttrWrapper {
// FIXME: require passing an NT to prevent misuse of this method
pub(crate) fn prepend_to_nt_inner(self, attrs: &mut AttrVec) {
let mut self_attrs = self.attrs;
std::mem::swap(attrs, &mut self_attrs);
mem::swap(attrs, &mut self_attrs);
attrs.extend(self_attrs);
}
@ -91,7 +92,7 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
struct LazyAttrTokenStreamImpl {
start_token: (Token, Spacing),
cursor_snapshot: TokenCursor,
num_calls: usize,
num_calls: u32,
break_last_token: bool,
replace_ranges: Box<[ReplaceRange]>,
}
@ -104,15 +105,16 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
// produce an empty `TokenStream` if no calls were made, and omit the
// final token otherwise.
let mut cursor_snapshot = self.cursor_snapshot.clone();
let tokens =
std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
.chain(std::iter::repeat_with(|| {
let token = cursor_snapshot.next();
(FlatToken::Token(token.0), token.1)
}))
.take(self.num_calls);
let tokens = iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
.chain(iter::repeat_with(|| {
let token = cursor_snapshot.next();
(FlatToken::Token(token.0), token.1)
}))
.take(self.num_calls as usize);
if !self.replace_ranges.is_empty() {
if self.replace_ranges.is_empty() {
make_attr_token_stream(tokens, self.break_last_token)
} else {
let mut tokens: Vec<_> = tokens.collect();
let mut replace_ranges = self.replace_ranges.to_vec();
replace_ranges.sort_by_key(|(range, _)| range.start);
@ -156,7 +158,7 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
// This keeps the total length of `tokens` constant throughout the
// replacement process, allowing us to use all of the `ReplaceRanges` entries
// without adjusting indices.
let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone))
let filler = iter::repeat((FlatToken::Empty, Spacing::Alone))
.take(range.len() - new_tokens.len());
tokens.splice(
@ -164,9 +166,7 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
new_tokens.into_iter().chain(filler),
);
}
make_token_stream(tokens.into_iter(), self.break_last_token)
} else {
make_token_stream(tokens, self.break_last_token)
make_attr_token_stream(tokens.into_iter(), self.break_last_token)
}
}
}
@ -218,24 +218,23 @@ impl<'a> Parser<'a> {
let start_token = (self.token.clone(), self.token_spacing);
let cursor_snapshot = self.token_cursor.clone();
let start_pos = self.num_bump_calls;
let has_outer_attrs = !attrs.attrs.is_empty();
let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
let replace_ranges_start = self.capture_state.replace_ranges.len();
let ret = f(self, attrs.attrs);
self.capture_state.capturing = prev_capturing;
let (mut ret, trailing) = ret?;
let (mut ret, trailing) = {
let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
let ret_and_trailing = f(self, attrs.attrs);
self.capture_state.capturing = prev_capturing;
ret_and_trailing?
};
// When we're not in `capture-cfg` mode, then bail out early if:
// 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
// so there's nothing for us to do.
// 2. Our target already has tokens set (e.g. we've parsed something
// like `#[my_attr] $item`. The actual parsing code takes care of prepending
// any attributes to the nonterminal, so we don't need to modify the
// already captured tokens.
// like `#[my_attr] $item`). The actual parsing code takes care of
// prepending any attributes to the nonterminal, so we don't need to
// modify the already captured tokens.
// Note that this check is independent of `force_collect`- if we already
// have tokens, or can't even store them, then there's never a need to
// force collection of new tokens.
@ -276,37 +275,32 @@ impl<'a> Parser<'a> {
let replace_ranges_end = self.capture_state.replace_ranges.len();
let mut end_pos = self.num_bump_calls;
let mut captured_trailing = false;
// Capture a trailing token if requested by the callback 'f'
match trailing {
TrailingToken::None => {}
let captured_trailing = match trailing {
TrailingToken::None => false,
TrailingToken::Gt => {
assert_eq!(self.token.kind, token::Gt);
false
}
TrailingToken::Semi => {
assert_eq!(self.token.kind, token::Semi);
end_pos += 1;
captured_trailing = true;
true
}
TrailingToken::MaybeComma => {
if self.token.kind == token::Comma {
end_pos += 1;
captured_trailing = true;
}
}
}
TrailingToken::MaybeComma => self.token.kind == token::Comma,
};
// If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens),
// then extend the range of captured tokens to include it, since the parser
// was not actually bumped past it. When the `LazyAttrTokenStream` gets converted
// into an `AttrTokenStream`, we will create the proper token.
if self.break_last_token {
assert!(!captured_trailing, "Cannot set break_last_token and have trailing token");
end_pos += 1;
}
assert!(
!(self.break_last_token && captured_trailing),
"Cannot set break_last_token and have trailing token"
);
let end_pos = self.num_bump_calls
+ captured_trailing as u32
// If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens), then
// extend the range of captured tokens to include it, since the parser was not actually
// bumped past it. When the `LazyAttrTokenStream` gets converted into an
// `AttrTokenStream`, we will create the proper token.
+ self.break_last_token as u32;
let num_calls = end_pos - start_pos;
@ -318,14 +312,11 @@ impl<'a> Parser<'a> {
// Grab any replace ranges that occur *inside* the current AST node.
// We will perform the actual replacement when we convert the `LazyAttrTokenStream`
// to an `AttrTokenStream`.
let start_calls: u32 = start_pos.try_into().unwrap();
self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
.iter()
.cloned()
.chain(inner_attr_replace_ranges.iter().cloned())
.map(|(range, tokens)| {
((range.start - start_calls)..(range.end - start_calls), tokens)
})
.map(|(range, tokens)| ((range.start - start_pos)..(range.end - start_pos), tokens))
.collect()
};
@ -340,7 +331,7 @@ impl<'a> Parser<'a> {
// If we support tokens at all
if let Some(target_tokens) = ret.tokens_mut() {
if target_tokens.is_none() {
// Store se our newly captured tokens into the AST node
// Store our newly captured tokens into the AST node.
*target_tokens = Some(tokens.clone());
}
}
@ -382,10 +373,10 @@ impl<'a> Parser<'a> {
}
}
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
/// of open and close delims.
fn make_token_stream(
/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
/// close delims.
fn make_attr_token_stream(
mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
break_last_token: bool,
) -> AttrTokenStream {
@ -464,6 +455,6 @@ mod size_asserts {
use rustc_data_structures::static_assert_size;
// tidy-alphabetical-start
static_assert_size!(AttrWrapper, 16);
static_assert_size!(LazyAttrTokenStreamImpl, 104);
static_assert_size!(LazyAttrTokenStreamImpl, 96);
// tidy-alphabetical-end
}

View File

@ -153,7 +153,7 @@ pub struct Parser<'a> {
expected_tokens: Vec<TokenType>,
token_cursor: TokenCursor,
// The number of calls to `bump`, i.e. the position in the token stream.
num_bump_calls: usize,
num_bump_calls: u32,
// During parsing we may sometimes need to 'unglue' a glued token into two
// component tokens (e.g. '>>' into '>' and '>), so the parser can consume
// them one at a time. This process bypasses the normal capturing mechanism
@ -192,7 +192,7 @@ pub struct Parser<'a> {
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
// it doesn't unintentionally get bigger.
#[cfg(target_pointer_width = "64")]
rustc_data_structures::static_assert_size!(Parser<'_>, 264);
rustc_data_structures::static_assert_size!(Parser<'_>, 256);
/// Stores span information about a closure.
#[derive(Clone, Debug)]
@ -1572,7 +1572,7 @@ impl<'a> Parser<'a> {
self.expected_tokens.clear();
}
pub fn approx_token_stream_pos(&self) -> usize {
pub fn approx_token_stream_pos(&self) -> u32 {
self.num_bump_calls
}
}