wstr: improve `Cow<WStr>` support
This adds missing `From` implementations, and utf8 conversion methods returning `Cow<WStr>`.
This commit is contained in:
parent
ab0f9b9fc2
commit
83f7bfc0c2
107
wstr/src/buf.rs
107
wstr/src/buf.rs
|
@ -1,4 +1,4 @@
|
||||||
use alloc::borrow::ToOwned;
|
use alloc::borrow::{Cow, ToOwned};
|
||||||
use alloc::string::String;
|
use alloc::string::String;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use core::fmt;
|
use core::fmt;
|
||||||
|
@ -108,7 +108,24 @@ impl WString {
|
||||||
unsafe { Self::from_buf_unchecked(buf) }
|
unsafe { Self::from_buf_unchecked(buf) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a `WString` from an pre-existing `WStr`.
|
||||||
|
#[inline]
|
||||||
|
pub fn from_wstr(s: &WStr) -> Self {
|
||||||
|
Self::from_buf(match s.units() {
|
||||||
|
Units::Bytes(us) => Units::Bytes(us.to_owned()),
|
||||||
|
Units::Wide(us) => Units::Wide(us.to_owned()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a `WString` from an UTF-8 `str`.
|
||||||
|
#[inline]
|
||||||
|
pub fn from_utf8(s: &str) -> Self {
|
||||||
|
let (ascii, tail) = split_ascii_prefix(s);
|
||||||
|
Self::from_utf8_inner(ascii, tail)
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a `WString` from an UTF-8 `String`, reusing the allocation if possible.
|
/// Creates a `WString` from an UTF-8 `String`, reusing the allocation if possible.
|
||||||
|
#[inline]
|
||||||
pub fn from_utf8_owned(s: String) -> Self {
|
pub fn from_utf8_owned(s: String) -> Self {
|
||||||
let (ascii, tail) = split_ascii_prefix(&s);
|
let (ascii, tail) = split_ascii_prefix(&s);
|
||||||
if tail.is_empty() {
|
if tail.is_empty() {
|
||||||
|
@ -116,6 +133,23 @@ impl WString {
|
||||||
return Self::from_buf(s.into_bytes());
|
return Self::from_buf(s.into_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Self::from_utf8_inner(ascii, tail)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a `WString` from UTF-8 bytes, treating invalid sequences
|
||||||
|
/// as described in `DecodeAvmUtf8`, and reusing the allocation if possible.
|
||||||
|
#[inline]
|
||||||
|
pub fn from_utf8_bytes(b: Vec<u8>) -> Self {
|
||||||
|
let (ascii, tail) = split_ascii_prefix_bytes(&b);
|
||||||
|
if tail.is_empty() {
|
||||||
|
// We can directly reinterpret ASCII bytes as LATIN1.
|
||||||
|
return Self::from_buf(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::from_utf8_bytes_inner(ascii, tail)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn from_utf8_inner(ascii: &[u8], tail: &str) -> Self {
|
||||||
let is_wide = tail.find(|ch| ch > u8::MAX.into()).is_some();
|
let is_wide = tail.find(|ch| ch > u8::MAX.into()).is_some();
|
||||||
if is_wide {
|
if is_wide {
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
|
@ -130,22 +164,8 @@ impl WString {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a `WString` from an UTF-8 `str`.
|
pub(crate) fn from_utf8_bytes_inner(ascii: &str, tail: &[u8]) -> Self {
|
||||||
#[inline]
|
|
||||||
pub fn from_utf8(s: &str) -> Self {
|
|
||||||
let mut buf = Self::new();
|
|
||||||
buf.push_utf8(s);
|
|
||||||
buf
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_utf8_bytes(b: Vec<u8>) -> Self {
|
|
||||||
let (ascii, tail) = split_ascii_prefix_bytes(&b);
|
|
||||||
let ascii = ascii.as_bytes();
|
let ascii = ascii.as_bytes();
|
||||||
if tail.is_empty() {
|
|
||||||
// We can directly reinterpret ASCII bytes as LATIN1.
|
|
||||||
return Self::from_buf(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
let is_wide = DecodeAvmUtf8::new(tail).any(|ch| ch > u8::MAX.into());
|
let is_wide = DecodeAvmUtf8::new(tail).any(|ch| ch > u8::MAX.into());
|
||||||
if is_wide {
|
if is_wide {
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
|
@ -170,7 +190,7 @@ impl WString {
|
||||||
buf
|
buf
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a `StrBuf` from a single unicode character.
|
/// Creates a `WString` from a single unicode character.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn from_char(c: char) -> Self {
|
pub fn from_char(c: char) -> Self {
|
||||||
let mut buf = Self::new();
|
let mut buf = Self::new();
|
||||||
|
@ -342,7 +362,6 @@ impl WString {
|
||||||
/// This will convert this `WString` into its wide form if necessary.
|
/// This will convert this `WString` into its wide form if necessary.
|
||||||
pub fn push_utf8(&mut self, s: &str) {
|
pub fn push_utf8(&mut self, s: &str) {
|
||||||
let (ascii, tail) = split_ascii_prefix(s);
|
let (ascii, tail) = split_ascii_prefix(s);
|
||||||
|
|
||||||
let is_wide = || tail.find(|ch| ch > u8::MAX.into()).is_some();
|
let is_wide = || tail.find(|ch| ch > u8::MAX.into()).is_some();
|
||||||
|
|
||||||
self.with_wide_buf_if(is_wide, |units| match units {
|
self.with_wide_buf_if(is_wide, |units| match units {
|
||||||
|
@ -357,6 +376,29 @@ impl WString {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Appends UTF-8 bytes to `self`, treating invalid sequences
|
||||||
|
/// as described in `DecodeAvmUtf8`.
|
||||||
|
///
|
||||||
|
/// This will convert this `WString` into its wide form if necessary.
|
||||||
|
pub fn push_utf8_bytes(&mut self, utf8: &[u8]) {
|
||||||
|
let (ascii, tail) = split_ascii_prefix_bytes(utf8);
|
||||||
|
let ascii = ascii.as_bytes();
|
||||||
|
let is_wide = || DecodeAvmUtf8::new(tail).any(|ch| ch > u8::MAX.into());
|
||||||
|
|
||||||
|
self.with_wide_buf_if(is_wide, |units| match units {
|
||||||
|
Units::Bytes(buf) => {
|
||||||
|
buf.extend_from_slice(ascii);
|
||||||
|
buf.extend(DecodeAvmUtf8::new(tail).map(|ch| ch as u8));
|
||||||
|
}
|
||||||
|
Units::Wide(buf) => {
|
||||||
|
buf.extend(ascii.iter().map(|c| u16::from(*c)));
|
||||||
|
for ch in DecodeAvmUtf8::new(tail) {
|
||||||
|
encode_raw_utf16(ch, buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/// Appends another `WStr` to `self`.
|
/// Appends another `WStr` to `self`.
|
||||||
///
|
///
|
||||||
/// This will convert this `WString` into its wide form if necessary.
|
/// This will convert this `WString` into its wide form if necessary.
|
||||||
|
@ -425,10 +467,9 @@ impl Clone for WString {
|
||||||
impl ToOwned for WStr {
|
impl ToOwned for WStr {
|
||||||
type Owned = WString;
|
type Owned = WString;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn to_owned(&self) -> Self::Owned {
|
fn to_owned(&self) -> Self::Owned {
|
||||||
let mut buf = WString::new();
|
WString::from_wstr(self)
|
||||||
buf.push_str(self);
|
|
||||||
buf
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clone_into(&self, target: &mut Self::Owned) {
|
fn clone_into(&self, target: &mut Self::Owned) {
|
||||||
|
@ -437,6 +478,30 @@ impl ToOwned for WStr {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a WStr> for Cow<'a, WStr> {
|
||||||
|
#[inline]
|
||||||
|
fn from(s: &'a WStr) -> Self {
|
||||||
|
Cow::Borrowed(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<WString> for Cow<'_, WStr> {
|
||||||
|
#[inline]
|
||||||
|
fn from(s: WString) -> Self {
|
||||||
|
Cow::Owned(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Cow<'_, WStr>> for WString {
|
||||||
|
#[inline]
|
||||||
|
fn from(s: Cow<'_, WStr>) -> Self {
|
||||||
|
match s {
|
||||||
|
Cow::Owned(s) => s,
|
||||||
|
Cow::Borrowed(s) => Self::from_wstr(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Deref for WString {
|
impl Deref for WString {
|
||||||
type Target = WStr;
|
type Target = WStr;
|
||||||
#[inline]
|
#[inline]
|
||||||
|
|
|
@ -32,6 +32,7 @@ pub use parse::{FromWStr, Integer};
|
||||||
pub use pattern::Pattern;
|
pub use pattern::Pattern;
|
||||||
pub use ptr::{WStr, MAX_STRING_LEN};
|
pub use ptr::{WStr, MAX_STRING_LEN};
|
||||||
|
|
||||||
|
use alloc::borrow::Cow;
|
||||||
use core::borrow::Borrow;
|
use core::borrow::Borrow;
|
||||||
|
|
||||||
use common::panic_on_invalid_length;
|
use common::panic_on_invalid_length;
|
||||||
|
@ -41,3 +42,28 @@ use common::panic_on_invalid_length;
|
||||||
pub fn join<E: Borrow<WStr>, S: Borrow<WStr>>(elems: &[E], sep: &S) -> WString {
|
pub fn join<E: Borrow<WStr>, S: Borrow<WStr>>(elems: &[E], sep: &S) -> WString {
|
||||||
crate::ops::str_join(elems, sep.borrow())
|
crate::ops::str_join(elems, sep.borrow())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a borrowed UTF-8 string to a `WStr` slice.
|
||||||
|
#[inline]
|
||||||
|
pub fn from_utf8(s: &str) -> Cow<'_, WStr> {
|
||||||
|
let (ascii, tail) = utils::split_ascii_prefix(s);
|
||||||
|
if tail.is_empty() {
|
||||||
|
// We can directly reinterpret ASCII bytes as LATIN1.
|
||||||
|
Cow::Borrowed(WStr::from_units(ascii))
|
||||||
|
} else {
|
||||||
|
Cow::Owned(WString::from_utf8_inner(ascii, tail))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts a slice of UTF-8 bytes to a `WStr` slice.
|
||||||
|
///
|
||||||
|
/// Invalid UTF-8 sequences are treated as described in `utils::DecodeAvmUtf8`.
|
||||||
|
pub fn from_utf8_bytes(bytes: &[u8]) -> Cow<'_, WStr> {
|
||||||
|
let (ascii, tail) = utils::split_ascii_prefix_bytes(bytes);
|
||||||
|
if tail.is_empty() {
|
||||||
|
// We can directly reinterpret ASCII bytes as LATIN1.
|
||||||
|
Cow::Borrowed(WStr::from_units(bytes))
|
||||||
|
} else {
|
||||||
|
Cow::Owned(WString::from_utf8_bytes_inner(ascii, tail))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue