rollup merge of #20391: daramos/utf8_lossy

Prior to 9bae6ec828 from_utf8_lossy had a minor optimization in place that avoided having to loop from the beginning of the input slice.
Recently 4908017d59 implemented Utf8Error::InvalidByte which makes this possible again.
This commit is contained in:
Alex Crichton 2015-01-02 09:22:42 -08:00
commit c5b9ffdee6
1 changed files with 7 additions and 3 deletions

View File

@ -143,14 +143,18 @@ impl String {
/// ```
#[stable]
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
let mut i = 0;
match str::from_utf8(v) {
Ok(s) => return Cow::Borrowed(s),
Err(..) => {}
Err(e) => {
if let Utf8Error::InvalidByte(firstbad) = e {
i = firstbad;
}
}
}
static TAG_CONT_U8: u8 = 128u8;
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
let mut i = 0;
let total = v.len();
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
unsafe { *xs.get_unchecked(i) }
@ -174,7 +178,7 @@ impl String {
// subseqidx is the index of the first byte of the subsequence we're looking at.
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
// them one by one.
let mut subseqidx = 0;
let mut subseqidx = i;
while i < total {
let i_ = i;