chore: Simplify code for UTF-8 decoding
This commit is contained in:
parent
df4e56f9dd
commit
ae5e2be8a3
|
@ -126,46 +126,40 @@ impl<'a> DecodeAvmUtf8<'a> {
|
||||||
impl<'a> Iterator for DecodeAvmUtf8<'a> {
|
impl<'a> Iterator for DecodeAvmUtf8<'a> {
|
||||||
type Item = u32;
|
type Item = u32;
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
let mut ch: u32;
|
|
||||||
let first = *self.src.get(self.index)?;
|
let first = *self.src.get(self.index)?;
|
||||||
let ones = first.leading_ones();
|
let ones = first.leading_ones();
|
||||||
|
self.index += 1;
|
||||||
|
|
||||||
if ones <= 1 {
|
if ones <= 1 {
|
||||||
self.index += 1;
|
return Some(first as u32);
|
||||||
Some(first as u32)
|
}
|
||||||
} else {
|
|
||||||
let mb_count = core::cmp::min(ones - 1, 3);
|
let mb_count = core::cmp::min(ones - 1, 3);
|
||||||
let bm = u8::MAX >> ones;
|
let bm = u8::MAX >> ones;
|
||||||
ch = (bm & first) as u32;
|
let mut ch = (bm & first) as u32;
|
||||||
match self
|
match self
|
||||||
.src
|
.src
|
||||||
.get(self.index + 1..)
|
.get(self.index..)
|
||||||
.and_then(|src| src.get(..mb_count as usize))
|
.and_then(|src| src.get(..mb_count as usize))
|
||||||
{
|
{
|
||||||
Some(mb) => {
|
Some(mb) => {
|
||||||
for b in mb.iter() {
|
for b in mb.iter() {
|
||||||
// continuation bytes should start with a single leading 1
|
// continuation bytes should start with a single leading 1
|
||||||
if b.leading_ones() != 1 {
|
if b.leading_ones() != 1 {
|
||||||
self.index += 1;
|
return Some(first as u32);
|
||||||
return Some(first as u32);
|
|
||||||
}
|
|
||||||
ch <<= 6;
|
|
||||||
ch |= (*b & (u8::MAX >> 2)) as u32;
|
|
||||||
}
|
|
||||||
if ch <= 128 {
|
|
||||||
self.index += 1;
|
|
||||||
Some(first as u32)
|
|
||||||
} else {
|
|
||||||
self.index += mb_count as usize + 1;
|
|
||||||
debug_assert!(ch <= 0x10FFFF);
|
|
||||||
Some(ch)
|
|
||||||
}
|
}
|
||||||
|
ch <<= 6;
|
||||||
|
ch |= (*b & (u8::MAX >> 2)) as u32;
|
||||||
}
|
}
|
||||||
None => {
|
if ch <= 128 {
|
||||||
self.index += 1;
|
|
||||||
Some(first as u32)
|
Some(first as u32)
|
||||||
|
} else {
|
||||||
|
self.index += mb_count as usize;
|
||||||
|
debug_assert!(ch <= 0x10FFFF);
|
||||||
|
Some(ch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
None => Some(first as u32),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue