Fix incorrect span when using byte-escaped rbrace

Fix #103826.
This commit is contained in:
Cassaundra Smith 2022-11-21 20:36:11 -08:00
parent 83356b78c4
commit 35c7939ae5
No known key found for this signature in database
GPG Key ID: E8087B0F3D674D9B
3 changed files with 108 additions and 49 deletions

View File

@ -38,6 +38,24 @@ impl InnerSpan {
}
}
/// The location and before/after width of a character whose width has changed from its source code
/// representation
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct InnerWidthMapping {
/// Index of the character in the source
pub position: usize,
/// The inner width in characters
pub before: usize,
/// The transformed width in characters
pub after: usize,
}
impl InnerWidthMapping {
pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
InnerWidthMapping { position, before, after }
}
}
/// The type of format string that we are parsing.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ParseMode {
@ -200,8 +218,8 @@ pub struct Parser<'a> {
style: Option<usize>,
/// Start and end byte offset of every successfully parsed argument
pub arg_places: Vec<InnerSpan>,
/// Characters that need to be shifted
skips: Vec<usize>,
/// Characters whose length has been changed from their in-code representation
width_map: Vec<InnerWidthMapping>,
/// Span of the last opening brace seen, used for error reporting
last_opening_brace: Option<InnerSpan>,
/// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
@ -224,7 +242,7 @@ impl<'a> Iterator for Parser<'a> {
'{' => {
let curr_last_brace = self.last_opening_brace;
let byte_pos = self.to_span_index(pos);
let lbrace_end = self.to_span_index(pos + 1);
let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
self.last_opening_brace = Some(byte_pos.to(lbrace_end));
self.cur.next();
if self.consume('{') {
@ -233,12 +251,15 @@ impl<'a> Iterator for Parser<'a> {
Some(String(self.string(pos + 1)))
} else {
let arg = self.argument(lbrace_end);
if let Some(rbrace_byte_idx) = self.must_consume('}') {
let lbrace_inner_offset = self.to_span_index(pos);
let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx);
if let Some(rbrace_pos) = self.must_consume('}') {
if self.is_literal {
let lbrace_byte_pos = self.to_span_index(pos);
let rbrace_byte_pos = self.to_span_index(rbrace_pos);
let width = self.to_span_width(rbrace_pos);
self.arg_places.push(
lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)),
lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
);
}
} else {
@ -285,7 +306,7 @@ impl<'a> Parser<'a> {
append_newline: bool,
mode: ParseMode,
) -> Parser<'a> {
let (skips, is_literal) = find_skips_from_snippet(snippet, style);
let (width_map, is_literal) = find_width_map_from_snippet(snippet, style);
Parser {
mode,
input: s,
@ -294,7 +315,7 @@ impl<'a> Parser<'a> {
curarg: 0,
style,
arg_places: vec![],
skips,
width_map,
last_opening_brace: None,
append_newline,
is_literal,
@ -367,21 +388,34 @@ impl<'a> Parser<'a> {
None
}
fn to_span_index(&self, pos: usize) -> InnerOffset {
let mut pos = pos;
// This handles the raw string case, the raw argument is the number of #
// in r###"..."### (we need to add one because of the `r`).
let raw = self.style.map_or(0, |raw| raw + 1);
for skip in &self.skips {
if pos > *skip {
pos += 1;
} else if pos == *skip && raw == 0 {
pos += 1;
fn remap_pos(&self, mut pos: usize) -> InnerOffset {
for width in &self.width_map {
if pos > width.position {
pos += width.before - width.after;
} else if pos == width.position && width.after == 0 {
pos += width.before;
} else {
break;
}
}
InnerOffset(raw + pos + 1)
InnerOffset(pos)
}
fn to_span_index(&self, pos: usize) -> InnerOffset {
// This handles the raw string case, the raw argument is the number of #
// in r###"..."### (we need to add one because of the `r`).
let raw = self.style.map_or(0, |raw| raw + 1);
let pos = self.remap_pos(pos);
InnerOffset(raw + pos.0 + 1)
}
fn to_span_width(&self, pos: usize) -> usize {
let pos = self.remap_pos(pos);
match self.width_map.iter().find(|w| w.position == pos.0) {
Some(w) => w.before,
None => 1,
}
}
fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
@ -809,10 +843,10 @@ impl<'a> Parser<'a> {
/// Finds the indices of all characters that have been processed and differ between the actual
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
fn find_skips_from_snippet(
fn find_width_map_from_snippet(
snippet: Option<string::String>,
str_style: Option<usize>,
) -> (Vec<usize>, bool) {
) -> (Vec<InnerWidthMapping>, bool) {
let snippet = match snippet {
Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
_ => return (vec![], false),
@ -825,43 +859,39 @@ fn find_skips_from_snippet(
let snippet = &snippet[1..snippet.len() - 1];
let mut s = snippet.char_indices();
let mut skips = vec![];
let mut width_mappings = vec![];
while let Some((pos, c)) = s.next() {
match (c, s.clone().next()) {
// skip whitespace and empty lines ending in '\\'
('\\', Some((next_pos, '\n'))) => {
skips.push(pos);
skips.push(next_pos);
('\\', Some((_, '\n'))) => {
let _ = s.next();
let mut width = 2;
while let Some((pos, c)) = s.clone().next() {
while let Some((_, c)) = s.clone().next() {
if matches!(c, ' ' | '\n' | '\t') {
skips.push(pos);
width += 1;
let _ = s.next();
} else {
break;
}
}
width_mappings.push(InnerWidthMapping::new(pos, width, 0));
}
('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
skips.push(next_pos);
('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
let _ = s.next();
}
('\\', Some((_, 'x'))) => {
for _ in 0..3 {
// consume `\xAB` literal
if let Some((pos, _)) = s.next() {
skips.push(pos);
} else {
break;
}
}
// consume `\xAB` literal
s.nth(2);
width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
}
('\\', Some((_, 'u'))) => {
if let Some((pos, _)) = s.next() {
skips.push(pos);
}
if let Some((next_pos, next_c)) = s.next() {
let mut width = 2;
let _ = s.next();
if let Some((_, next_c)) = s.next() {
if next_c == '{' {
// consume up to 6 hexanumeric chars
let digits_len =
@ -881,19 +911,18 @@ fn find_skips_from_snippet(
let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
// skip '{' and '}' also
for pos in (next_pos..).take(required_skips + 2) {
skips.push(pos)
}
width += required_skips + 2;
s.nth(digits_len);
} else if next_c.is_digit(16) {
skips.push(next_pos);
width += 1;
// We suggest adding `{` and `}` when appropriate, accept it here as if
// it were correct
let mut i = 0; // consume up to 6 hexanumeric chars
while let (Some((next_pos, c)), _) = (s.next(), i < 6) {
while let (Some((_, c)), _) = (s.next(), i < 6) {
if c.is_digit(16) {
skips.push(next_pos);
width += 1;
} else {
break;
}
@ -901,11 +930,13 @@ fn find_skips_from_snippet(
}
}
}
width_mappings.push(InnerWidthMapping::new(pos, width, 1));
}
_ => {}
}
}
(skips, true)
(width_mappings, true)
}
#[cfg(test)]

View File

@ -0,0 +1,8 @@
fn main() {
format!("{\x7D");
//~^ ERROR 1 positional argument in format string, but no arguments were given
format!("\x7B\x7D");
//~^ ERROR 1 positional argument in format string, but no arguments were given
format!("{\x7D {\x7D");
//~^ ERROR 2 positional arguments in format string, but no arguments were given
}

View File

@ -0,0 +1,20 @@
error: 1 positional argument in format string, but no arguments were given
--> $DIR/issue-103826.rs:2:14
|
LL | format!("{\x7D");
| ^^^^^
error: 1 positional argument in format string, but no arguments were given
--> $DIR/issue-103826.rs:4:14
|
LL | format!("\x7B\x7D");
| ^^^^^^^^
error: 2 positional arguments in format string, but no arguments were given
--> $DIR/issue-103826.rs:6:14
|
LL | format!("{\x7D {\x7D");
| ^^^^^ ^^^^^
error: aborting due to 3 previous errors