wstr: improve `Cow<WStr>` support

This adds missing `From` implementations, and utf8 conversion methods
returning `Cow<WStr>`.
This commit is contained in:
Moulins 2023-04-05 19:15:48 +02:00 committed by Nathan Adams
parent ab0f9b9fc2
commit 83f7bfc0c2
2 changed files with 112 additions and 21 deletions

View File

@ -1,4 +1,4 @@
use alloc::borrow::ToOwned; use alloc::borrow::{Cow, ToOwned};
use alloc::string::String; use alloc::string::String;
use alloc::vec::Vec; use alloc::vec::Vec;
use core::fmt; use core::fmt;
@ -108,7 +108,24 @@ impl WString {
unsafe { Self::from_buf_unchecked(buf) } unsafe { Self::from_buf_unchecked(buf) }
} }
/// Creates a `WString` from an pre-existing `WStr`.
#[inline]
pub fn from_wstr(s: &WStr) -> Self {
Self::from_buf(match s.units() {
Units::Bytes(us) => Units::Bytes(us.to_owned()),
Units::Wide(us) => Units::Wide(us.to_owned()),
})
}
/// Creates a `WString` from an UTF-8 `str`.
#[inline]
pub fn from_utf8(s: &str) -> Self {
let (ascii, tail) = split_ascii_prefix(s);
Self::from_utf8_inner(ascii, tail)
}
/// Creates a `WString` from an UTF-8 `String`, reusing the allocation if possible. /// Creates a `WString` from an UTF-8 `String`, reusing the allocation if possible.
#[inline]
pub fn from_utf8_owned(s: String) -> Self { pub fn from_utf8_owned(s: String) -> Self {
let (ascii, tail) = split_ascii_prefix(&s); let (ascii, tail) = split_ascii_prefix(&s);
if tail.is_empty() { if tail.is_empty() {
@ -116,6 +133,23 @@ impl WString {
return Self::from_buf(s.into_bytes()); return Self::from_buf(s.into_bytes());
} }
Self::from_utf8_inner(ascii, tail)
}
/// Creates a `WString` from UTF-8 bytes, treating invalid sequences
/// as described in `DecodeAvmUtf8`, and reusing the allocation if possible.
#[inline]
pub fn from_utf8_bytes(b: Vec<u8>) -> Self {
let (ascii, tail) = split_ascii_prefix_bytes(&b);
if tail.is_empty() {
// We can directly reinterpret ASCII bytes as LATIN1.
return Self::from_buf(b);
}
Self::from_utf8_bytes_inner(ascii, tail)
}
pub(crate) fn from_utf8_inner(ascii: &[u8], tail: &str) -> Self {
let is_wide = tail.find(|ch| ch > u8::MAX.into()).is_some(); let is_wide = tail.find(|ch| ch > u8::MAX.into()).is_some();
if is_wide { if is_wide {
let mut buf = Vec::new(); let mut buf = Vec::new();
@ -130,22 +164,8 @@ impl WString {
} }
} }
/// Creates a `WString` from an UTF-8 `str`. pub(crate) fn from_utf8_bytes_inner(ascii: &str, tail: &[u8]) -> Self {
#[inline]
pub fn from_utf8(s: &str) -> Self {
let mut buf = Self::new();
buf.push_utf8(s);
buf
}
pub fn from_utf8_bytes(b: Vec<u8>) -> Self {
let (ascii, tail) = split_ascii_prefix_bytes(&b);
let ascii = ascii.as_bytes(); let ascii = ascii.as_bytes();
if tail.is_empty() {
// We can directly reinterpret ASCII bytes as LATIN1.
return Self::from_buf(b);
}
let is_wide = DecodeAvmUtf8::new(tail).any(|ch| ch > u8::MAX.into()); let is_wide = DecodeAvmUtf8::new(tail).any(|ch| ch > u8::MAX.into());
if is_wide { if is_wide {
let mut buf = Vec::new(); let mut buf = Vec::new();
@ -170,7 +190,7 @@ impl WString {
buf buf
} }
/// Creates a `StrBuf` from a single unicode character. /// Creates a `WString` from a single unicode character.
#[inline] #[inline]
pub fn from_char(c: char) -> Self { pub fn from_char(c: char) -> Self {
let mut buf = Self::new(); let mut buf = Self::new();
@ -342,7 +362,6 @@ impl WString {
/// This will convert this `WString` into its wide form if necessary. /// This will convert this `WString` into its wide form if necessary.
pub fn push_utf8(&mut self, s: &str) { pub fn push_utf8(&mut self, s: &str) {
let (ascii, tail) = split_ascii_prefix(s); let (ascii, tail) = split_ascii_prefix(s);
let is_wide = || tail.find(|ch| ch > u8::MAX.into()).is_some(); let is_wide = || tail.find(|ch| ch > u8::MAX.into()).is_some();
self.with_wide_buf_if(is_wide, |units| match units { self.with_wide_buf_if(is_wide, |units| match units {
@ -357,6 +376,29 @@ impl WString {
}); });
} }
/// Appends UTF-8 bytes to `self`, treating invalid sequences
/// as described in `DecodeAvmUtf8`.
///
/// This will convert this `WString` into its wide form if necessary.
pub fn push_utf8_bytes(&mut self, utf8: &[u8]) {
let (ascii, tail) = split_ascii_prefix_bytes(utf8);
let ascii = ascii.as_bytes();
let is_wide = || DecodeAvmUtf8::new(tail).any(|ch| ch > u8::MAX.into());
self.with_wide_buf_if(is_wide, |units| match units {
Units::Bytes(buf) => {
buf.extend_from_slice(ascii);
buf.extend(DecodeAvmUtf8::new(tail).map(|ch| ch as u8));
}
Units::Wide(buf) => {
buf.extend(ascii.iter().map(|c| u16::from(*c)));
for ch in DecodeAvmUtf8::new(tail) {
encode_raw_utf16(ch, buf);
}
}
});
}
/// Appends another `WStr` to `self`. /// Appends another `WStr` to `self`.
/// ///
/// This will convert this `WString` into its wide form if necessary. /// This will convert this `WString` into its wide form if necessary.
@ -425,10 +467,9 @@ impl Clone for WString {
impl ToOwned for WStr { impl ToOwned for WStr {
type Owned = WString; type Owned = WString;
#[inline]
fn to_owned(&self) -> Self::Owned { fn to_owned(&self) -> Self::Owned {
let mut buf = WString::new(); WString::from_wstr(self)
buf.push_str(self);
buf
} }
fn clone_into(&self, target: &mut Self::Owned) { fn clone_into(&self, target: &mut Self::Owned) {
@ -437,6 +478,30 @@ impl ToOwned for WStr {
} }
} }
impl<'a> From<&'a WStr> for Cow<'a, WStr> {
#[inline]
fn from(s: &'a WStr) -> Self {
Cow::Borrowed(s)
}
}
impl From<WString> for Cow<'_, WStr> {
#[inline]
fn from(s: WString) -> Self {
Cow::Owned(s)
}
}
impl From<Cow<'_, WStr>> for WString {
#[inline]
fn from(s: Cow<'_, WStr>) -> Self {
match s {
Cow::Owned(s) => s,
Cow::Borrowed(s) => Self::from_wstr(s),
}
}
}
impl Deref for WString { impl Deref for WString {
type Target = WStr; type Target = WStr;
#[inline] #[inline]

View File

@ -32,6 +32,7 @@ pub use parse::{FromWStr, Integer};
pub use pattern::Pattern; pub use pattern::Pattern;
pub use ptr::{WStr, MAX_STRING_LEN}; pub use ptr::{WStr, MAX_STRING_LEN};
use alloc::borrow::Cow;
use core::borrow::Borrow; use core::borrow::Borrow;
use common::panic_on_invalid_length; use common::panic_on_invalid_length;
@ -41,3 +42,28 @@ use common::panic_on_invalid_length;
pub fn join<E: Borrow<WStr>, S: Borrow<WStr>>(elems: &[E], sep: &S) -> WString { pub fn join<E: Borrow<WStr>, S: Borrow<WStr>>(elems: &[E], sep: &S) -> WString {
crate::ops::str_join(elems, sep.borrow()) crate::ops::str_join(elems, sep.borrow())
} }
/// Converts a borrowed UTF-8 string to a `WStr` slice.
#[inline]
pub fn from_utf8(s: &str) -> Cow<'_, WStr> {
let (ascii, tail) = utils::split_ascii_prefix(s);
if tail.is_empty() {
// We can directly reinterpret ASCII bytes as LATIN1.
Cow::Borrowed(WStr::from_units(ascii))
} else {
Cow::Owned(WString::from_utf8_inner(ascii, tail))
}
}
/// Converts a slice of UTF-8 bytes to a `WStr` slice.
///
/// Invalid UTF-8 sequences are treated as described in `utils::DecodeAvmUtf8`.
pub fn from_utf8_bytes(bytes: &[u8]) -> Cow<'_, WStr> {
let (ascii, tail) = utils::split_ascii_prefix_bytes(bytes);
if tail.is_empty() {
// We can directly reinterpret ASCII bytes as LATIN1.
Cow::Borrowed(WStr::from_units(bytes))
} else {
Cow::Owned(WString::from_utf8_bytes_inner(ascii, tail))
}
}