Auto merge of #121150 - Swatinem:debug-ascii-str, r=joboet

Add a fast-path to `Debug` ASCII `&str`

Instead of going through the `EscapeDebug` machinery, we can just skip over ASCII chars that don’t need any escaping.

---

This is an alternative / a companion to https://github.com/rust-lang/rust/pull/121138.

The other PR is adding the fast path deep within `EscapeDebug`, whereas this skips as early as possible.
This commit is contained in:
bors 2024-05-24 12:23:00 +00:00
commit 213ad10c8f
2 changed files with 45 additions and 22 deletions

View File

@ -44,7 +44,7 @@ fn ascii_escapes(b: &mut Bencher) {
assert_fmt(
s,
r#""some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte""#,
21,
15,
);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
@ -72,7 +72,7 @@ fn mostly_unicode(b: &mut Bencher) {
#[bench]
fn mixed(b: &mut Bencher) {
let s = "\"❤️\"\n\"hűha ez betű\"\n\"кириллических букв\".";
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 36);
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 21);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
});

View File

@ -2402,23 +2402,47 @@ impl Display for bool {
impl Debug for str {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.write_char('"')?;
let mut from = 0;
for (i, c) in self.char_indices() {
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: false,
escape_double_quote: true,
});
// If char needs escaping, flush backlog so far and write, else skip
if esc.len() != 1 {
f.write_str(&self[from..i])?;
for c in esc {
f.write_char(c)?;
}
from = i + c.len_utf8();
}
// substring we know is printable
let mut printable_range = 0..0;
fn needs_escape(b: u8) -> bool {
b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
}
f.write_str(&self[from..])?;
// the loop here first skips over runs of printable ASCII as a fast path.
// other chars (unicode, or ASCII that needs escaping) are then handled per-`char`.
let mut rest = self;
while rest.len() > 0 {
let Some(non_printable_start) = rest.as_bytes().iter().position(|&b| needs_escape(b))
else {
printable_range.end += rest.len();
break;
};
printable_range.end += non_printable_start;
// SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
rest = unsafe { rest.get_unchecked(non_printable_start..) };
let mut chars = rest.chars();
if let Some(c) = chars.next() {
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: false,
escape_double_quote: true,
});
if esc.len() != 1 {
f.write_str(&self[printable_range.clone()])?;
Display::fmt(&esc, f)?;
printable_range.start = printable_range.end + c.len_utf8();
}
printable_range.end += c.len_utf8();
}
rest = chars.as_str();
}
f.write_str(&self[printable_range])?;
f.write_char('"')
}
}
@ -2434,13 +2458,12 @@ impl Display for str {
impl Debug for char {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.write_char('\'')?;
for c in self.escape_debug_ext(EscapeDebugExtArgs {
let esc = self.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: true,
escape_double_quote: false,
}) {
f.write_char(c)?
}
});
Display::fmt(&esc, f)?;
f.write_char('\'')
}
}