chore: Simplify code for UTF-8 decoding

This commit is contained in:
EmperorBale 2022-07-17 21:41:25 -07:00 committed by Mike Welsh
parent df4e56f9dd
commit ae5e2be8a3
1 changed files with 25 additions and 31 deletions

View File

@ -126,46 +126,40 @@ impl<'a> DecodeAvmUtf8<'a> {
impl<'a> Iterator for DecodeAvmUtf8<'a> {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
let mut ch: u32;
let first = *self.src.get(self.index)?;
let ones = first.leading_ones();
self.index += 1;
if ones <= 1 {
self.index += 1;
Some(first as u32)
} else {
let mb_count = core::cmp::min(ones - 1, 3);
let bm = u8::MAX >> ones;
ch = (bm & first) as u32;
match self
.src
.get(self.index + 1..)
.and_then(|src| src.get(..mb_count as usize))
{
Some(mb) => {
for b in mb.iter() {
// continuation bytes should start with a single leading 1
if b.leading_ones() != 1 {
self.index += 1;
return Some(first as u32);
}
ch <<= 6;
ch |= (*b & (u8::MAX >> 2)) as u32;
}
if ch <= 128 {
self.index += 1;
Some(first as u32)
} else {
self.index += mb_count as usize + 1;
debug_assert!(ch <= 0x10FFFF);
Some(ch)
return Some(first as u32);
}
let mb_count = core::cmp::min(ones - 1, 3);
let bm = u8::MAX >> ones;
let mut ch = (bm & first) as u32;
match self
.src
.get(self.index..)
.and_then(|src| src.get(..mb_count as usize))
{
Some(mb) => {
for b in mb.iter() {
// continuation bytes should start with a single leading 1
if b.leading_ones() != 1 {
return Some(first as u32);
}
ch <<= 6;
ch |= (*b & (u8::MAX >> 2)) as u32;
}
None => {
self.index += 1;
if ch <= 128 {
Some(first as u32)
} else {
self.index += mb_count as usize;
debug_assert!(ch <= 0x10FFFF);
Some(ch)
}
}
None => Some(first as u32),
}
}
}