Rollup merge of #111009 - scottmcm:ascii-char, r=BurntSushi

Add `ascii::Char` (ACP#179)

ACP second: https://github.com/rust-lang/libs-team/issues/179#issuecomment-1527900570
New tracking issue: https://github.com/rust-lang/rust/issues/110998

For now this is an `enum` as `@kupiakos` [suggested](https://github.com/rust-lang/libs-team/issues/179#issuecomment-1527959724), with the variants under a different feature flag.

There's lots more things that could be added here, and place for further doc updates, but this seems like a plausible starting point PR.

I've gone through and put an `as_ascii` next to every `is_ascii`: on `u8`, `char`, `[u8]`, and `str`.

As a demonstration, made a commit updating some formatting code to use this: https://github.com/scottmcm/rust/commit/ascii-char-in-fmt (I don't want to include that in this PR, though, because that brings in perf questions that don't exist if this is just adding new unstable APIs.)
This commit is contained in:
Matthias Krüger 2023-05-04 19:18:21 +02:00 committed by GitHub
commit ea0b6504fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 724 additions and 1 deletions

View File

@ -101,6 +101,7 @@
#![feature(array_into_iter_constructors)]
#![feature(array_methods)]
#![feature(array_windows)]
#![feature(ascii_char)]
#![feature(assert_matches)]
#![feature(async_iterator)]
#![feature(coerce_unsized)]

View File

@ -2526,6 +2526,15 @@ impl<T: fmt::Display + ?Sized> ToString for T {
}
}
#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "ascii_char", issue = "110998")]
impl ToString for core::ascii::Char {
#[inline]
fn to_string(&self) -> String {
self.as_str().to_owned()
}
}
#[cfg(not(no_global_oom_handling))]
#[stable(feature = "char_to_string_specialization", since = "1.46.0")]
impl ToString for char {

View File

@ -0,0 +1,34 @@
use crate::ascii;
#[cfg(not(test))]
impl<const N: usize> [u8; N] {
/// Converts this array of bytes into a array of ASCII characters,
/// or returns `None` if any of the characters is non-ASCII.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
if self.is_ascii() {
// SAFETY: Just checked that it's ASCII
Some(unsafe { self.as_ascii_unchecked() })
} else {
None
}
}
/// Converts this array of bytes into a array of ASCII characters,
/// without checking whether they're valid.
///
/// # Safety
///
/// Every byte in the array must be in `0..=127`, or else this is UB.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char; N] {
let byte_ptr: *const [u8; N] = self;
let ascii_ptr = byte_ptr as *const [ascii::Char; N];
// SAFETY: The caller promised all the bytes are ASCII
unsafe { &*ascii_ptr }
}
}

View File

@ -17,6 +17,7 @@ use crate::ops::{
};
use crate::slice::{Iter, IterMut};
mod ascii;
mod drain;
mod equality;
mod iter;

View File

@ -14,6 +14,10 @@ use crate::fmt;
use crate::iter::FusedIterator;
use crate::num::NonZeroUsize;
mod ascii_char;
#[unstable(feature = "ascii_char", issue = "110998")]
pub use ascii_char::AsciiChar as Char;
/// An iterator over the escaped version of a byte.
///
/// This `struct` is created by the [`escape_default`] function. See its

View File

@ -0,0 +1,565 @@
//! This uses the name `AsciiChar`, even though it's not exposed that way right now,
//! because it avoids a whole bunch of "are you sure you didn't mean `char`?"
//! suggestions from rustc if you get anything slightly wrong in here, and overall
//! helps with clarity as we're also referring to `char` intentionally in here.
use crate::fmt;
use crate::mem::transmute;
/// One of the 128 Unicode characters from U+0000 through U+007F,
/// often known as the [ASCII] subset.
///
/// Officially, this is the first [block] in Unicode, _Basic Latin_.
/// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
///
/// This block was based on older 7-bit character code standards such as
/// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
///
/// # When to use this
///
/// The main advantage of this subset is that it's always valid UTF-8. As such,
/// the `&[ascii::Char]` -> `&str` conversion function (as well as other related
/// ones) are O(1): *no* runtime checks are needed.
///
/// If you're consuming strings, you should usually handle Unicode and thus
/// accept `str`s, not limit yourself to `ascii::Char`s.
///
/// However, certain formats are intentionally designed to produce ASCII-only
/// output in order to be 8-bit-clean. In those cases, it can be simpler and
/// faster to generate `ascii::Char`s instead of dealing with the variable width
/// properties of general UTF-8 encoded strings, while still allowing the result
/// to be used freely with other Rust things that deal in general `str`s.
///
/// For example, a UUID library might offer a way to produce the string
/// representation of a UUID as an `[ascii::Char; 36]` to avoid memory
/// allocation yet still allow it to be used as UTF-8 via `as_str` without
/// paying for validation (or needing `unsafe` code) the way it would if it
/// were provided as a `[u8; 36]`.
///
/// # Layout
///
/// This type is guaranteed to have a size and alignment of 1 byte.
///
/// # Names
///
/// The variants on this type are [Unicode names][NamesList] of the characters
/// in upper camel case, with a few tweaks:
/// - For `<control>` characters, the primary alias name is used.
/// - `LATIN` is dropped, as this block has no non-latin letters.
/// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
/// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
///
/// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
/// [block]: https://www.unicode.org/glossary/index.html#block
/// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
/// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
/// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[unstable(feature = "ascii_char", issue = "110998")]
#[repr(u8)]
pub enum AsciiChar {
/// U+0000
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Null = 0,
/// U+0001
#[unstable(feature = "ascii_char_variants", issue = "110998")]
StartOfHeading = 1,
/// U+0002
#[unstable(feature = "ascii_char_variants", issue = "110998")]
StartOfText = 2,
/// U+0003
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EndOfText = 3,
/// U+0004
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EndOfTransmission = 4,
/// U+0005
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Enquiry = 5,
/// U+0006
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Acknowledge = 6,
/// U+0007
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Bell = 7,
/// U+0008
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Backspace = 8,
/// U+0009
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CharacterTabulation = 9,
/// U+000A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LineFeed = 10,
/// U+000B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LineTabulation = 11,
/// U+000C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
FormFeed = 12,
/// U+000D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CarriageReturn = 13,
/// U+000E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
ShiftOut = 14,
/// U+000F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
ShiftIn = 15,
/// U+0010
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DataLinkEscape = 16,
/// U+0011
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DeviceControlOne = 17,
/// U+0012
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DeviceControlTwo = 18,
/// U+0013
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DeviceControlThree = 19,
/// U+0014
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DeviceControlFour = 20,
/// U+0015
#[unstable(feature = "ascii_char_variants", issue = "110998")]
NegativeAcknowledge = 21,
/// U+0016
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SynchronousIdle = 22,
/// U+0017
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EndOfTransmissionBlock = 23,
/// U+0018
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Cancel = 24,
/// U+0019
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EndOfMedium = 25,
/// U+001A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Substitute = 26,
/// U+001B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Escape = 27,
/// U+001C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
InformationSeparatorFour = 28,
/// U+001D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
InformationSeparatorThree = 29,
/// U+001E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
InformationSeparatorTwo = 30,
/// U+001F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
InformationSeparatorOne = 31,
/// U+0020
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Space = 32,
/// U+0021
#[unstable(feature = "ascii_char_variants", issue = "110998")]
ExclamationMark = 33,
/// U+0022
#[unstable(feature = "ascii_char_variants", issue = "110998")]
QuotationMark = 34,
/// U+0023
#[unstable(feature = "ascii_char_variants", issue = "110998")]
NumberSign = 35,
/// U+0024
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DollarSign = 36,
/// U+0025
#[unstable(feature = "ascii_char_variants", issue = "110998")]
PercentSign = 37,
/// U+0026
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Ampersand = 38,
/// U+0027
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Apostrophe = 39,
/// U+0028
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LeftParenthesis = 40,
/// U+0029
#[unstable(feature = "ascii_char_variants", issue = "110998")]
RightParenthesis = 41,
/// U+002A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Asterisk = 42,
/// U+002B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
PlusSign = 43,
/// U+002C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Comma = 44,
/// U+002D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
HyphenMinus = 45,
/// U+002E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
FullStop = 46,
/// U+002F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Solidus = 47,
/// U+0030
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit0 = 48,
/// U+0031
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit1 = 49,
/// U+0032
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit2 = 50,
/// U+0033
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit3 = 51,
/// U+0034
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit4 = 52,
/// U+0035
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit5 = 53,
/// U+0036
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit6 = 54,
/// U+0037
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit7 = 55,
/// U+0038
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit8 = 56,
/// U+0039
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit9 = 57,
/// U+003A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Colon = 58,
/// U+003B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Semicolon = 59,
/// U+003C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LessThanSign = 60,
/// U+003D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EqualsSign = 61,
/// U+003E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
GreaterThanSign = 62,
/// U+003F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
QuestionMark = 63,
/// U+0040
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CommercialAt = 64,
/// U+0041
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalA = 65,
/// U+0042
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalB = 66,
/// U+0043
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalC = 67,
/// U+0044
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalD = 68,
/// U+0045
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalE = 69,
/// U+0046
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalF = 70,
/// U+0047
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalG = 71,
/// U+0048
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalH = 72,
/// U+0049
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalI = 73,
/// U+004A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalJ = 74,
/// U+004B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalK = 75,
/// U+004C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalL = 76,
/// U+004D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalM = 77,
/// U+004E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalN = 78,
/// U+004F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalO = 79,
/// U+0050
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalP = 80,
/// U+0051
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalQ = 81,
/// U+0052
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalR = 82,
/// U+0053
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalS = 83,
/// U+0054
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalT = 84,
/// U+0055
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalU = 85,
/// U+0056
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalV = 86,
/// U+0057
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalW = 87,
/// U+0058
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalX = 88,
/// U+0059
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalY = 89,
/// U+005A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalZ = 90,
/// U+005B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LeftSquareBracket = 91,
/// U+005C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
ReverseSolidus = 92,
/// U+005D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
RightSquareBracket = 93,
/// U+005E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CircumflexAccent = 94,
/// U+005F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LowLine = 95,
/// U+0060
#[unstable(feature = "ascii_char_variants", issue = "110998")]
GraveAccent = 96,
/// U+0061
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallA = 97,
/// U+0062
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallB = 98,
/// U+0063
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallC = 99,
/// U+0064
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallD = 100,
/// U+0065
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallE = 101,
/// U+0066
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallF = 102,
/// U+0067
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallG = 103,
/// U+0068
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallH = 104,
/// U+0069
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallI = 105,
/// U+006A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallJ = 106,
/// U+006B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallK = 107,
/// U+006C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallL = 108,
/// U+006D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallM = 109,
/// U+006E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallN = 110,
/// U+006F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallO = 111,
/// U+0070
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallP = 112,
/// U+0071
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallQ = 113,
/// U+0072
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallR = 114,
/// U+0073
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallS = 115,
/// U+0074
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallT = 116,
/// U+0075
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallU = 117,
/// U+0076
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallV = 118,
/// U+0077
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallW = 119,
/// U+0078
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallX = 120,
/// U+0079
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallY = 121,
/// U+007A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallZ = 122,
/// U+007B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LeftCurlyBracket = 123,
/// U+007C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
VerticalLine = 124,
/// U+007D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
RightCurlyBracket = 125,
/// U+007E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Tilde = 126,
/// U+007F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Delete = 127,
}
impl AsciiChar {
/// Creates an ascii character from the byte `b`,
/// or returns `None` if it's too large.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn from_u8(b: u8) -> Option<Self> {
if b <= 127 {
// SAFETY: Just checked that `b` is in-range
Some(unsafe { Self::from_u8_unchecked(b) })
} else {
None
}
}
/// Creates an ASCII character from the byte `b`,
/// without checking whether it's valid.
///
/// # Safety
///
/// `b` must be in `0..=127`, or else this is UB.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
// SAFETY: Our safety precondition is that `b` is in-range.
unsafe { transmute(b) }
}
/// When passed the *number* `0`, `1`, …, `9`, returns the *character*
/// `'0'`, `'1'`, …, `'9'` respectively.
///
/// If `d >= 10`, returns `None`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn digit(d: u8) -> Option<Self> {
if d < 10 {
// SAFETY: Just checked it's in-range.
Some(unsafe { Self::digit_unchecked(d) })
} else {
None
}
}
/// When passed the *number* `0`, `1`, …, `9`, returns the *character*
/// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range.
///
/// # Safety
///
/// This is immediate UB if called with `d > 64`.
///
/// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic.
/// Notably, it should not be expected to return hex digits, or any other
/// reasonable extension of the decimal digits.
///
/// (This lose safety condition is intended to simplify soundness proofs
/// when writing code using this method, since the implementation doesn't
/// need something really specific, not to make those other arguments do
/// something useful. It might be tightened before stabilization.)
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const unsafe fn digit_unchecked(d: u8) -> Self {
debug_assert!(d < 10);
// SAFETY: `'0'` through `'9'` are U+00030 through U+0039,
// so because `d` must be 64 or less the addition can return at most
// 112 (0x70), which doesn't overflow and is within the ASCII range.
unsafe {
let byte = b'0'.unchecked_add(d);
Self::from_u8_unchecked(byte)
}
}
/// Gets this ASCII character as a byte.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_u8(self) -> u8 {
self as u8
}
/// Gets this ASCII character as a `char` Unicode Scalar Value.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_char(self) -> char {
self as u8 as char
}
/// Views this ASCII character as a one-code-unit UTF-8 `str`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_str(&self) -> &str {
crate::slice::from_ref(self).as_str()
}
}
impl [AsciiChar] {
/// Views this slice of ASCII characters as a UTF-8 `str`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_str(&self) -> &str {
let ascii_ptr: *const Self = self;
let str_ptr = ascii_ptr as *const str;
// SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
// code unit having the same value as the ASCII byte.
unsafe { &*str_ptr }
}
/// Views this slice of ASCII characters as a slice of `u8` bytes.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_bytes(&self) -> &[u8] {
self.as_str().as_bytes()
}
}
#[unstable(feature = "ascii_char", issue = "110998")]
impl fmt::Display for AsciiChar {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
<str as fmt::Display>::fmt(self.as_str(), f)
}
}

View File

@ -1,5 +1,6 @@
//! impl char {}
use crate::ascii;
use crate::slice;
use crate::str::from_utf8_unchecked_mut;
use crate::unicode::printable::is_printable;
@ -1101,6 +1102,24 @@ impl char {
*self as u32 <= 0x7F
}
/// Returns `Some` if the value is within the ASCII range,
/// or `None` if it's not.
///
/// This is preferred to [`Self::is_ascii`] when you're passing the value
/// along to something else that can take [`ascii::Char`] rather than
/// needing to check again for itself whether the value is in ASCII.
#[must_use]
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_ascii(&self) -> Option<ascii::Char> {
if self.is_ascii() {
// SAFETY: Just checked that this is ASCII.
Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
} else {
None
}
}
/// Makes a copy of the value in its ASCII upper case equivalent.
///
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',

View File

@ -472,7 +472,16 @@ impl u8 {
#[rustc_const_stable(feature = "const_u8_is_ascii", since = "1.43.0")]
#[inline]
pub const fn is_ascii(&self) -> bool {
*self & 128 == 0
*self <= 127
}
/// If the value of this byte is within the ASCII range, returns it as an
/// [ASCII character](ascii::Char). Otherwise, returns `None`.
#[must_use]
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_ascii(&self) -> Option<ascii::Char> {
ascii::Char::from_u8(*self)
}
/// Makes a copy of the value in its ASCII upper case equivalent.

View File

@ -16,6 +16,36 @@ impl [u8] {
is_ascii(self)
}
/// If this slice [`is_ascii`](Self::is_ascii), returns it as a slice of
/// [ASCII characters](`ascii::Char`), otherwise returns `None`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
if self.is_ascii() {
// SAFETY: Just checked that it's ASCII
Some(unsafe { self.as_ascii_unchecked() })
} else {
None
}
}
/// Converts this slice of bytes into a slice of ASCII characters,
/// without checking whether they're valid.
///
/// # Safety
///
/// Every byte in the slice must be in `0..=127`, or else this is UB.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char] {
let byte_ptr: *const [u8] = self;
let ascii_ptr = byte_ptr as *const [ascii::Char];
// SAFETY: The caller promised all the bytes are ASCII
unsafe { &*ascii_ptr }
}
/// Checks that two slices are an ASCII case-insensitive match.
///
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,

View File

@ -16,6 +16,7 @@ mod validations;
use self::pattern::Pattern;
use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
use crate::ascii;
use crate::char::{self, EscapeDebugExtArgs};
use crate::mem;
use crate::slice::{self, SliceIndex};
@ -2366,6 +2367,16 @@ impl str {
self.as_bytes().is_ascii()
}
/// If this string slice [`is_ascii`](Self::is_ascii), returns it as a slice
/// of [ASCII characters](`ascii::Char`), otherwise returns `None`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
// Like in `is_ascii`, we can work on the bytes directly.
self.as_bytes().as_ascii()
}
/// Checks that two strings are an ASCII case-insensitive match.
///
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,

View File

@ -16,6 +16,9 @@
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::ascii::{escape_default, EscapeDefault};
#[unstable(feature = "ascii_char", issue = "110998")]
pub use core::ascii::Char;
/// Extension methods for ASCII-subset only operations.
///
/// Be aware that operations on seemingly non-ASCII characters can sometimes

View File

@ -0,0 +1,37 @@
// compile-flags: -C opt-level=1
// ignore-debug (the extra assertions get in the way)
#![crate_type = "lib"]
#![feature(ascii_char)]
use std::ascii::Char as AsciiChar;
// CHECK-LABEL: i8 @unwrap_digit_from_remainder(i32
#[no_mangle]
pub fn unwrap_digit_from_remainder(v: u32) -> AsciiChar {
// CHECK-NOT: icmp
// CHECK-NOT: panic
// CHECK: %[[R:.+]] = urem i32 %v, 10
// CHECK-NEXT: %[[T:.+]] = trunc i32 %[[R]] to i8
// CHECK-NEXT: %[[D:.+]] = or i8 %[[T]], 48
// CHECK-NEXT: ret i8 %[[D]]
// CHECK-NOT: icmp
// CHECK-NOT: panic
AsciiChar::digit((v % 10) as u8).unwrap()
}
// CHECK-LABEL: i8 @unwrap_from_masked(i8
#[no_mangle]
pub fn unwrap_from_masked(b: u8) -> AsciiChar {
// CHECK-NOT: icmp
// CHECK-NOT: panic
// CHECK: %[[M:.+]] = and i8 %b, 127
// CHECK-NEXT: ret i8 %[[M]]
// CHECK-NOT: icmp
// CHECK-NOT: panic
AsciiChar::from_u8(b & 0x7f).unwrap()
}