chore: Simplify code for UTF-8 decoding

This commit is contained in:
EmperorBale 2022-07-17 21:41:25 -07:00 committed by Mike Welsh
parent df4e56f9dd
commit ae5e2be8a3
1 changed files with 25 additions and 31 deletions

View File

@ -126,46 +126,40 @@ impl<'a> DecodeAvmUtf8<'a> {
impl<'a> Iterator for DecodeAvmUtf8<'a> { impl<'a> Iterator for DecodeAvmUtf8<'a> {
type Item = u32; type Item = u32;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
let mut ch: u32;
let first = *self.src.get(self.index)?; let first = *self.src.get(self.index)?;
let ones = first.leading_ones(); let ones = first.leading_ones();
self.index += 1;
if ones <= 1 { if ones <= 1 {
self.index += 1; return Some(first as u32);
Some(first as u32) }
} else {
let mb_count = core::cmp::min(ones - 1, 3); let mb_count = core::cmp::min(ones - 1, 3);
let bm = u8::MAX >> ones; let bm = u8::MAX >> ones;
ch = (bm & first) as u32; let mut ch = (bm & first) as u32;
match self match self
.src .src
.get(self.index + 1..) .get(self.index..)
.and_then(|src| src.get(..mb_count as usize)) .and_then(|src| src.get(..mb_count as usize))
{ {
Some(mb) => { Some(mb) => {
for b in mb.iter() { for b in mb.iter() {
// continuation bytes should start with a single leading 1 // continuation bytes should start with a single leading 1
if b.leading_ones() != 1 { if b.leading_ones() != 1 {
self.index += 1; return Some(first as u32);
return Some(first as u32);
}
ch <<= 6;
ch |= (*b & (u8::MAX >> 2)) as u32;
}
if ch <= 128 {
self.index += 1;
Some(first as u32)
} else {
self.index += mb_count as usize + 1;
debug_assert!(ch <= 0x10FFFF);
Some(ch)
} }
ch <<= 6;
ch |= (*b & (u8::MAX >> 2)) as u32;
} }
None => { if ch <= 128 {
self.index += 1;
Some(first as u32) Some(first as u32)
} else {
self.index += mb_count as usize;
debug_assert!(ch <= 0x10FFFF);
Some(ch)
} }
} }
None => Some(first as u32),
} }
} }
} }