ruffle/wstr/src/buf.rs

491 lines
15 KiB
Rust

use alloc::borrow::ToOwned;
use alloc::string::String;
use alloc::vec::Vec;
use core::fmt;
use core::mem::{self, ManuallyDrop};
use core::ops::{Deref, DerefMut};
use core::ptr::NonNull;
use static_assertions::assert_eq_size;
use super::utils::{encode_raw_utf16, split_ascii_prefix, split_ascii_prefix_bytes, DecodeAvmUtf8};
use super::{ptr, Units, WStr, MAX_STRING_LEN};
/// An owned, extensible UCS2 string, analoguous to `String`.
pub struct WString {
data: NonNull<()>,
meta: ptr::WStrMetadata,
capacity: u32,
}
#[cfg(target_pointer_width = "32")]
assert_eq_size!(WString, [u8; 12]);
#[cfg(target_pointer_width = "64")]
assert_eq_size!(WString, [u8; 16]);
impl WString {
/// Creates a new empty `WString`.
#[inline]
pub fn new() -> Self {
Self::from_buf(Units::Bytes(Vec::new()))
}
/// Creates a new empty `WString` with the given capacity and wideness.
#[inline]
pub fn with_capacity(capacity: usize, wide: bool) -> Self {
if capacity > MAX_STRING_LEN {
super::panic_on_invalid_length(capacity);
}
// SAFETY: the buffer is created empty, and we checked the capacity above.
unsafe {
Self::from_buf_unchecked(if wide {
Units::Wide(Vec::with_capacity(capacity))
} else {
Units::Bytes(Vec::with_capacity(capacity))
})
}
}
/// Creates a `WString` from an owned buffer containing 1 or 2-bytes code units,
/// without checking its length or capacity.
///
/// # Safety
///
/// The length and the capacity cannot be greater than `MAX_STRING_LEN`.
#[inline]
pub unsafe fn from_buf_unchecked(buf: Units<Vec<u8>, Vec<u16>>) -> Self {
// SAFETY: we take ownership of the buffer; avoid double frees
let mut buf = ManuallyDrop::new(buf);
let (cap, len, ptr, is_wide) = match buf.deref_mut() {
Units::Bytes(buf) => (buf.capacity(), buf.len(), buf.as_mut_ptr() as *mut _, false),
Units::Wide(buf) => (buf.capacity(), buf.len(), buf.as_mut_ptr() as *mut _, true),
};
Self {
data: NonNull::new_unchecked(ptr),
meta: ptr::WStrMetadata::new(len, is_wide),
capacity: cap as u32,
}
}
/// Creates a `WString` from an owned buffer containing 1 or 2-bytes code units.
#[inline]
pub fn from_buf(buf: impl Into<Units<Vec<u8>, Vec<u16>>>) -> Self {
// Tries to shrink the capacity below the maximum allowed WStr length.
#[cold]
fn shrink<T>(buf: &mut Vec<T>) {
assert!(buf.capacity() > MAX_STRING_LEN);
let len = buf.len();
if len > MAX_STRING_LEN {
super::panic_on_invalid_length(len);
}
buf.shrink_to(MAX_STRING_LEN);
let ptr = ManuallyDrop::new(mem::take(buf)).as_mut_ptr();
// SAFETY:
// Per its contract, `Vec::shrink_to` reallocated the buffer to have
// a capacity between `MAX_STRING_LEN` and `buf.capacity()`.
unsafe {
*buf = Vec::from_raw_parts(ptr, len, MAX_STRING_LEN);
}
}
#[inline(always)]
fn ensure_valid_cap<T>(buf: &mut Vec<T>) {
if buf.capacity() > MAX_STRING_LEN {
shrink(buf)
}
}
let mut buf = buf.into();
match &mut buf {
Units::Bytes(buf) => ensure_valid_cap(buf),
Units::Wide(buf) => ensure_valid_cap(buf),
}
// SAFETY: the length and the capacity was checked above.
unsafe { Self::from_buf_unchecked(buf) }
}
/// Creates a `WString` from an UTF-8 `String`, reusing the allocation if possible.
pub fn from_utf8_owned(s: String) -> Self {
let (ascii, tail) = split_ascii_prefix(&s);
if tail.is_empty() {
// We can directly reinterpret ASCII bytes as LATIN1.
return Self::from_buf(s.into_bytes());
}
let is_wide = tail.find(|ch| ch > u8::MAX.into()).is_some();
if is_wide {
let mut buf = Vec::new();
buf.extend(ascii.iter().map(|c| u16::from(*c)));
buf.extend(tail.encode_utf16());
Self::from_buf(buf)
} else {
let mut buf = Vec::new();
buf.extend_from_slice(ascii);
buf.extend(tail.chars().map(|c| c as u8));
Self::from_buf(buf)
}
}
/// Creates a `WString` from an UTF-8 `str`.
#[inline]
pub fn from_utf8(s: &str) -> Self {
let mut buf = Self::new();
buf.push_utf8(s);
buf
}
pub fn from_utf8_bytes(b: Vec<u8>) -> Self {
let (ascii, tail) = split_ascii_prefix_bytes(&b);
let ascii = ascii.as_bytes();
if tail.is_empty() {
// We can directly reinterpret ASCII bytes as LATIN1.
return Self::from_buf(b);
}
let is_wide = DecodeAvmUtf8::new(tail).any(|ch| ch > u8::MAX.into());
if is_wide {
let mut buf = Vec::new();
buf.extend(ascii.iter().map(|c| u16::from(*c)));
for ch in DecodeAvmUtf8::new(tail) {
encode_raw_utf16(ch, &mut buf);
}
Self::from_buf(buf)
} else {
let mut buf = Vec::new();
buf.extend_from_slice(ascii);
buf.extend(DecodeAvmUtf8::new(tail).map(|c| c as u8));
Self::from_buf(buf)
}
}
/// Creates a `WString` from a single UCS2 code unit.
#[inline]
pub fn from_unit(c: u16) -> Self {
let mut buf = Self::new();
buf.push(c);
buf
}
/// Creates a `StrBuf` from a single unicode character.
#[inline]
pub fn from_char(c: char) -> Self {
let mut buf = Self::new();
buf.push_char(c);
buf
}
/// Converts this `WString` into a string slice.
pub fn as_wstr(&self) -> &WStr {
let wstr = ptr::from_raw_parts(self.data.as_ptr(), self.meta);
// SAFETY:`self` is immutably borrowed.
unsafe { &*wstr }
}
/// Converts this `WString` into a mutable string slice.
pub fn as_wstr_mut(&mut self) -> &mut WStr {
let wstr = ptr::from_raw_parts(self.data.as_ptr(), self.meta);
// SAFETY:`self` is mutably borrowed.
unsafe { &mut *wstr }
}
/// Steals the internal buffer.
///
/// # Safety
///
/// - any future access to `self` (including drop) will invalidate the returned buffer.
/// - the returned buffer shouldn't be dropped unless self is forgotten.
#[inline]
unsafe fn steal_buf(&mut self) -> ManuallyDrop<Units<Vec<u8>, Vec<u16>>> {
let cap = self.capacity as usize;
// SAFETY: we reconstruct the Vec<T> deconstructed in `Self::from_buf`.
let buffer = if self.meta.is_wide() {
Units::Wide(Vec::from_raw_parts(
self.data.cast().as_ptr(),
self.meta.len(),
cap,
))
} else {
Units::Bytes(Vec::from_raw_parts(
self.data.cast().as_ptr(),
self.meta.len(),
cap,
))
};
ManuallyDrop::new(buffer)
}
/// Cheaply converts the `WString` into its internal buffer.
#[inline]
pub fn into_buf(self) -> Units<Vec<u8>, Vec<u16>> {
let mut this = ManuallyDrop::new(self);
// SAFETY: `this` is never dropped, so we can take "true" ownership of the buffer.
unsafe { ManuallyDrop::into_inner(this.steal_buf()) }
}
// Modify the raw internal buffer.
//
// Panics if the resulting buffer has a length greater than `MAX_STRING_LEN`.
fn with_buf<F, R>(&mut self, f: F) -> R
where
F: FnOnce(&mut Units<Vec<u8>, Vec<u16>>) -> R,
{
struct Guard<'a> {
source: &'a mut WString,
buffer: ManuallyDrop<Units<Vec<u8>, Vec<u16>>>,
}
impl<'a> Guard<'a> {
fn init(source: &'a mut WString) -> Self {
let buffer = unsafe { source.steal_buf() };
Self { source, buffer }
}
fn commit(mut self) {
// SAFETY: we disable the Drop impl, so we can put the ManuallyDrop'd buffer back
unsafe {
let buffer = ManuallyDrop::take(&mut self.buffer);
core::ptr::write(self.source, WString::from_buf(buffer));
mem::forget(self);
}
}
}
impl<'a> Drop for Guard<'a> {
fn drop(&mut self) {
// SAFETY: something has gone wrong, replace the buffer with an empty one and drop it.
unsafe {
core::ptr::write(self.source, WString::new());
ManuallyDrop::drop(&mut self.buffer);
}
}
}
let mut guard = Guard::init(self);
let result = f(&mut guard.buffer);
guard.commit();
result
}
fn with_wide_buf_if<W, F, R>(&mut self, wide: W, f: F) -> R
where
W: FnOnce() -> bool,
F: FnOnce(&mut Units<Vec<u8>, Vec<u16>>) -> R,
{
self.with_buf(|units| {
if let Units::Bytes(buf) = units {
// Convert into wide string if necessary.
if wide() {
let buf = mem::take(buf);
*units = Units::Wide(buf.into_iter().map(|c| c.into()).collect());
}
}
f(units)
})
}
/// Truncates this `WString`, removing all contents.
pub fn clear(&mut self) {
// SAFETY: 0 is always a valid length.
unsafe {
self.meta = ptr::WStrMetadata::new(0, self.meta.is_wide());
}
}
/// Appends a UTF-16 code unit to `self`.
///
/// This will convert this `WString` into its wide form if necessary.
pub fn push(&mut self, ch: u16) {
self.with_wide_buf_if(
|| ch > u8::MAX.into(),
|units| match units {
Units::Bytes(buf) => buf.push(ch as u8),
Units::Wide(buf) => buf.push(ch),
},
)
}
// Appends a LATIN1 code unit to `self`.
pub fn push_byte(&mut self, ch: u8) {
self.with_buf(|units| match units {
Units::Bytes(buf) => buf.push(ch),
Units::Wide(buf) => buf.push(ch.into()),
})
}
/// Appends a Unicode character to `self`.
///
/// This will convert this `WString` into its wide form if necessary.
pub fn push_char(&mut self, ch: char) {
self.with_wide_buf_if(
|| ch as u32 > u8::MAX.into(),
|units| match units {
Units::Bytes(buf) => buf.push(ch as u8),
Units::Wide(buf) => {
let mut tmp = [0; 2];
buf.extend_from_slice(ch.encode_utf16(&mut tmp));
}
},
)
}
/// Appends a UTF-8 string to `self`.
///
/// This will convert this `WString` into its wide form if necessary.
pub fn push_utf8(&mut self, s: &str) {
let (ascii, tail) = split_ascii_prefix(s);
let is_wide = || tail.find(|ch| ch > u8::MAX.into()).is_some();
self.with_wide_buf_if(is_wide, |units| match units {
Units::Bytes(buf) => {
buf.extend_from_slice(ascii);
buf.extend(tail.encode_utf16().map(|ch| ch as u8));
}
Units::Wide(buf) => {
buf.extend(ascii.iter().map(|c| u16::from(*c)));
buf.extend(tail.encode_utf16());
}
});
}
/// Appends another `WStr` to `self`.
///
/// This will convert this `WString` into its wide form if necessary.
pub fn push_str(&mut self, s: &WStr) {
let other = s.units();
let is_wide = || matches!(other, Units::Wide(_));
self.with_wide_buf_if(is_wide, |units| match (units, other) {
(Units::Bytes(buf), Units::Bytes(other)) => buf.extend_from_slice(other),
(Units::Wide(buf), Units::Wide(other)) => buf.extend_from_slice(other),
(Units::Wide(buf), Units::Bytes(other)) => {
buf.extend(other.iter().map(|c| u16::from(*c)))
}
(Units::Bytes(_), Units::Wide(_)) => unreachable!(),
})
}
}
impl Drop for WString {
fn drop(&mut self) {
// SAFETY: `self` is gone after this line.
unsafe {
let mut buf = self.steal_buf();
ManuallyDrop::drop(&mut buf);
};
}
}
impl Default for WString {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl Clone for WString {
fn clone(&self) -> Self {
let owned = match self.units() {
Units::Bytes(us) => Units::Bytes(us.to_owned()),
Units::Wide(us) => Units::Wide(us.to_owned()),
};
// SAFETY: We know the length isn't too big.
unsafe { Self::from_buf_unchecked(owned) }
}
fn clone_from(&mut self, other: &Self) {
if self.is_wide() != other.is_wide() {
*self = other.clone();
return;
}
self.with_buf(|buf| match (buf, other.units()) {
(Units::Bytes(left), Units::Bytes(right)) => {
left.clear();
left.extend_from_slice(right);
}
(Units::Wide(left), Units::Wide(right)) => {
left.clear();
left.extend_from_slice(right);
}
_ => unreachable!(),
})
}
}
impl ToOwned for WStr {
type Owned = WString;
fn to_owned(&self) -> Self::Owned {
let mut buf = WString::new();
buf.push_str(self);
buf
}
fn clone_into(&self, target: &mut Self::Owned) {
target.clear();
target.push_str(self);
}
}
impl Deref for WString {
type Target = WStr;
#[inline]
fn deref(&self) -> &WStr {
self.as_wstr()
}
}
impl DerefMut for WString {
#[inline]
fn deref_mut(&mut self) -> &mut WStr {
self.as_wstr_mut()
}
}
impl<'a> From<&'a WStr> for WString {
#[inline]
fn from(s: &'a WStr) -> Self {
s.to_owned()
}
}
impl FromIterator<u16> for WString {
fn from_iter<T: IntoIterator<Item = u16>>(iter: T) -> Self {
let iter = iter.into_iter();
let (min_size, _) = iter.size_hint();
let mut buf = Self::with_capacity(min_size, false);
iter.for_each(|c| buf.push(c));
buf
}
}
impl AsMut<WStr> for WString {
#[inline]
fn as_mut(&mut self) -> &mut WStr {
self.deref_mut()
}
}
impl fmt::Write for WString {
#[inline]
fn write_str(&mut self, s: &str) -> fmt::Result {
self.push_utf8(s);
Ok(())
}
#[inline]
fn write_char(&mut self, c: char) -> fmt::Result {
self.push_char(c);
Ok(())
}
}
wstr_impl_traits!(impl for WString);