wstr: introduce explicit WStrMetadata type

This commit is contained in:
Moulins 2022-04-07 20:43:24 +02:00 committed by Mike Welsh
parent e0c72f4c9a
commit 2d60e62b8b
3 changed files with 84 additions and 62 deletions

View File

@ -164,14 +164,14 @@ impl WString {
unsafe fn steal_buf(&mut self) -> ManuallyDrop<Units<Vec<u8>, Vec<u16>>> { unsafe fn steal_buf(&mut self) -> ManuallyDrop<Units<Vec<u8>, Vec<u16>>> {
let ptr = self.ptr.as_ptr(); let ptr = self.ptr.as_ptr();
let data = super::ptr::data(ptr); let data = super::ptr::data(ptr);
let len = super::ptr::len(ptr); let meta = super::ptr::metadata(ptr);
let cap = self.capacity; let cap = self.capacity;
// SAFETY: we reconstruct the Vec<T> deconstructed in `Self::from_buf`. // SAFETY: we reconstruct the Vec<T> deconstructed in `Self::from_buf`.
let buffer = if super::ptr::is_wide(ptr) { let buffer = if meta.is_wide() {
Units::Wide(Vec::from_raw_parts(data as *mut u16, len, cap)) Units::Wide(Vec::from_raw_parts(data as *mut u16, meta.len(), cap))
} else { } else {
Units::Bytes(Vec::from_raw_parts(data as *mut u8, len, cap)) Units::Bytes(Vec::from_raw_parts(data as *mut u8, meta.len(), cap))
}; };
ManuallyDrop::new(buffer) ManuallyDrop::new(buffer)
} }
@ -367,6 +367,16 @@ impl Clone for WString {
} }
} }
impl ToOwned for WStr {
type Owned = WString;
fn to_owned(&self) -> Self::Owned {
let mut buf = WString::new();
buf.push_str(self);
buf
}
}
impl Deref for WString { impl Deref for WString {
type Target = WStr; type Target = WStr;
#[inline] #[inline]

View File

@ -150,14 +150,14 @@ impl WStr {
#[inline] #[inline]
pub fn is_wide(&self) -> bool { pub fn is_wide(&self) -> bool {
// SAFETY: `self` is a valid `WStr`. // SAFETY: `self` is a valid `WStr`.
unsafe { ptr::is_wide(ptr::ptr_mut(self)) } unsafe { ptr::metadata(ptr::ptr_mut(self)).is_wide() }
} }
/// Returns the number of code units. /// Returns the number of code units.
#[inline] #[inline]
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
// SAFETY: `self` is a valid `WStr`. // SAFETY: `self` is a valid `WStr`.
unsafe { ptr::len(ptr::ptr_mut(self)) } unsafe { ptr::metadata(ptr::ptr_mut(self)).len() }
} }
/// Returns `true` if `self` contains no code units. /// Returns `true` if `self` contains no code units.

View File

@ -1,22 +1,21 @@
use alloc::borrow::ToOwned;
use core::ops::Range; use core::ops::Range;
use core::ptr::{slice_from_raw_parts, slice_from_raw_parts_mut}; use core::ptr::{slice_from_raw_parts, slice_from_raw_parts_mut};
use super::{Units, WString}; use super::Units;
#[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))] #[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))]
compile_error!("WStr only supports 32-bits and 64-bits targets"); compile_error!("WStr only supports 32-bits and 64-bits targets");
/// The maximum string length, equals to 2³¹-1. /// The maximum string length, equals to 2³¹-1.
pub const MAX_STRING_LEN: usize = 0x7FFF_FFFF; pub const MAX_STRING_LEN: usize = 0x7FFF_FFFF;
const WIDE_MASK: usize = MAX_STRING_LEN + 1; const WIDE_MASK: u32 = MAX_STRING_LEN as u32 + 1;
/// A UCS2 string slice, analoguous to `&'a str`. /// A UCS2 string slice, analoguous to `&'a str`.
#[repr(transparent)] #[repr(transparent)]
pub struct WStr { pub struct WStr {
/// The internal `WStr` representation. /// The internal `WStr` representation.
/// ///
/// What we actually want here is a custom DST, but they don't exist be we must cheat /// What we actually want here is a custom DST, but they don't exist so we must cheat
/// and abuse the slice metadata field. /// and abuse the slice metadata field.
/// ///
/// The data pointer points to the start of the units buffer, which is either a /// The data pointer points to the start of the units buffer, which is either a
@ -28,7 +27,6 @@ pub struct WStr {
/// - for `Units::Wide`, it is a one. /// - for `Units::Wide`, it is a one.
/// ///
/// Note that on 64-bits targets, this leaves the high 32 bits of the length unused. /// Note that on 64-bits targets, this leaves the high 32 bits of the length unused.
/// (TODO: find a nice way to expose them for usage by other types?)
/// ///
/// # (Un)soundness /// # (Un)soundness
/// ///
@ -47,19 +45,54 @@ pub struct WStr {
_repr: [()], _repr: [()],
} }
impl ToOwned for WStr { /// The metadata of a `WStr` pointer. This is always 4 bytes wide, even on 64-bits targets.
type Owned = WString; ///
/// The layout of `WStr` depends on the value of `self.is_wide()`:
/// - if `false`, it has the layout of `[u8; self.len()]`;
/// - if `true`, it has the layout of `[u16; self.len()]`.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct WStrMetadata(u32);
fn to_owned(&self) -> Self::Owned { impl WStrMetadata {
let mut buf = WString::new(); /// SAFETY: raw must fit in a u32
buf.push_str(self); #[inline(always)]
buf const unsafe fn from_usize(raw: usize) -> Self {
if raw > u32::MAX as usize {
if cfg!(debug_assertions) {
panic!("invalid WStr metadata");
} else {
core::hint::unreachable_unchecked()
}
}
Self(raw as u32)
}
/// Assemble `WStr` metadata from its components.
///
/// # Safety
/// `len` must be less than or equal to `MAX_STRING_LEN`.
#[inline(always)]
pub const unsafe fn new(len: usize, is_wide: bool) -> Self {
Self::from_usize(len | if is_wide { WIDE_MASK as usize } else { 0 })
}
/// Returns whether this metadata describes a wide `WStr`.
#[inline(always)]
pub const fn is_wide(self) -> bool {
(self.0 & WIDE_MASK) != 0
}
/// Returns the length of the described `WStr`. This is never greater than `MAX_STRING_LEN`.
#[inline(always)]
pub const fn len(self) -> usize {
(self.0 & (WIDE_MASK - 1)) as usize
} }
} }
/// Convenience method to turn a `&T` into a `*mut T`. /// Convenience method to turn a `&T` into a `*mut T`.
#[inline] #[inline]
pub fn ptr_mut<T: ?Sized>(t: &T) -> *mut T { pub(crate) fn ptr_mut<T: ?Sized>(t: &T) -> *mut T {
t as *const T as *mut T t as *const T as *mut T
} }
@ -83,37 +116,21 @@ pub fn data(ptr: *mut WStr) -> *mut () {
ptr.cast::<()>() ptr.cast::<()>()
} }
/// Returns the length of the raw `WStr` slice. /// Returns the metadata part of a a raw `WStr` pointer.
///
/// This is always less than or equals to `MAX_STRING_LEN`.
/// ///
/// # Safety /// # Safety
/// - `ptr` must point to some allocated storage of arbitrary size. /// - `ptr` must point to some allocated storage of arbitrary size.
/// - the pointer metadata must be valid.
#[inline] #[inline]
pub unsafe fn len(ptr: *mut WStr) -> usize { pub unsafe fn metadata(ptr: *mut WStr) -> WStrMetadata {
raw_len(ptr as *mut [()]) & MAX_STRING_LEN let raw = raw_len(ptr as *mut [()]);
} WStrMetadata::from_usize(raw)
/// Returns `true` if the raw `WStr` slice is wide.
///
/// # Safety
/// - `ptr` must point to some allocated storage of arbitrary size.
#[inline]
pub unsafe fn is_wide(ptr: *mut WStr) -> bool {
raw_len(ptr as *mut [()]) & WIDE_MASK != 0
} }
/// Creates a `WStr` pointer from its raw parts. /// Creates a `WStr` pointer from its raw parts.
///
/// # Safety
/// - `len` must be less than or equals to `MAX_STRING_LEN`
/// - `data` must point to allocated storage fitting the layout of:
/// - `[u8; len]` if `is_wide` is `false`;
/// - `[u16; len]` if `is_wide` is `true`.
#[inline] #[inline]
pub unsafe fn from_raw_parts(data: *mut (), len: usize, is_wide: bool) -> *mut WStr { pub fn from_raw_parts(data: *mut (), metadata: WStrMetadata) -> *mut WStr {
let raw_len = len | if is_wide { WIDE_MASK } else { 0 }; let slice = slice_from_raw_parts(data, metadata.0 as usize);
let slice = slice_from_raw_parts(data, raw_len);
slice as *mut WStr slice as *mut WStr
} }
@ -121,54 +138,49 @@ pub unsafe fn from_raw_parts(data: *mut (), len: usize, is_wide: bool) -> *mut W
/// ///
/// # Safety /// # Safety
/// - the buffer length must be less than or equals to `MAX_STRING_LEN` /// - the buffer length must be less than or equals to `MAX_STRING_LEN`
/// - the buffer must point to allocated storage fitting the layout of: /// - the buffer must point to allocated storage of arbitrary size.
/// - `[u8; len]` if it is a `Units::Bytes`;
/// - `[u16; len]` if it is a `Units::Wide`.
#[inline] #[inline]
pub unsafe fn from_units(units: Units<*mut [u8], *mut [u16]>) -> *mut WStr { pub unsafe fn from_units(units: Units<*mut [u8], *mut [u16]>) -> *mut WStr {
let (data, len, is_wide) = match units { let (data, len, is_wide) = match units {
Units::Bytes(us) => (us as *mut (), raw_len(us), false), Units::Bytes(us) => (us as *mut (), raw_len(us), false),
Units::Wide(us) => (us as *mut (), raw_len(us), true), Units::Wide(us) => (us as *mut (), raw_len(us), true),
}; };
from_raw_parts(data, len, is_wide)
from_raw_parts(data, WStrMetadata::new(len, is_wide))
} }
/// Gets a reference to the buffer pointed by `ptr`. /// Gets a pointer to the buffer designated by `ptr`.
/// ///
/// # Safety /// # Safety
/// - `ptr` must point to some allocated storage of arbitrary size. /// - `ptr` must point to some allocated storage of arbitrary size.
#[inline] #[inline]
pub unsafe fn units(ptr: *mut WStr) -> Units<*mut [u8], *mut [u16]> { pub unsafe fn units(ptr: *mut WStr) -> Units<*mut [u8], *mut [u16]> {
let (data, len) = (data(ptr), len(ptr)); let (data, meta) = (data(ptr), metadata(ptr));
if is_wide(ptr) { if meta.is_wide() {
Units::Wide(slice_from_raw_parts_mut(data as *mut u16, len)) Units::Wide(slice_from_raw_parts_mut(data as *mut u16, meta.len()))
} else { } else {
Units::Bytes(slice_from_raw_parts_mut(data as *mut u8, len)) Units::Bytes(slice_from_raw_parts_mut(data as *mut u8, meta.len()))
} }
} }
/// Gets a pointer to the `n`th unit of this `WStr. /// Gets a pointer to the `n`th unit of this `WStr`.
/// ///
/// # Safety /// # Safety
/// - `ptr` must point to a valid `WStr`; /// - `ptr` must point to a valid `WStr`;
/// - `i` must be less than or equals to `len(ptr)`. /// - `i` must be less than or equals to `metadata(ptr).len()`.
#[inline] #[inline]
pub unsafe fn offset(ptr: *mut WStr, i: usize) -> Units<*mut u8, *mut u16> { pub unsafe fn offset(ptr: *mut WStr, i: usize) -> Units<*mut u8, *mut u16> {
// SAFETY: we have `index <= len(ptr) <= MAX_STRING_LEN < i32::MAX`, so: if metadata(ptr).is_wide() {
// - `i` can be casted to `isize` on 32-bit and 64-bit targets; Units::Wide((ptr as *mut u16).add(i))
// - the offset call is in bounds.
let n = i as isize;
if is_wide(ptr) {
Units::Wide((ptr as *mut u16).offset(n))
} else { } else {
Units::Bytes((ptr as *mut u8).offset(n)) Units::Bytes((ptr as *mut u8).add(i))
} }
} }
/// Dereferences the `n`th unit of this `WStr`. /// Dereferences the `n`th unit of this `WStr`.
/// ///
/// # Safety /// # Safety
/// - `ptr` must point to a valid `WStr` for reading; /// - `ptr` must point to a valid `WStr` for reading;
/// - `i` must be less than `len(ptr)`. /// - `i` must be less than `metadata(ptr).len()`.
pub unsafe fn read_at(ptr: *mut WStr, i: usize) -> u16 { pub unsafe fn read_at(ptr: *mut WStr, i: usize) -> u16 {
match offset(ptr, i) { match offset(ptr, i) {
Units::Bytes(p) => (*p).into(), Units::Bytes(p) => (*p).into(),
@ -181,7 +193,7 @@ pub unsafe fn read_at(ptr: *mut WStr, i: usize) -> u16 {
/// # Safety /// # Safety
/// - `ptr` must point to a valid `WStr`; /// - `ptr` must point to a valid `WStr`;
/// - `range.start` must be less than or equals to `range.end`; /// - `range.start` must be less than or equals to `range.end`;
/// - `range.end` must be less than or equals to `len(ptr)`. /// - `range.end` must be less than or equals to `metadata(ptr).len()`.
#[inline] #[inline]
pub unsafe fn slice(ptr: *mut WStr, range: Range<usize>) -> *mut WStr { pub unsafe fn slice(ptr: *mut WStr, range: Range<usize>) -> *mut WStr {
let len = range.end - range.start; let len = range.end - range.start;
@ -189,5 +201,5 @@ pub unsafe fn slice(ptr: *mut WStr, range: Range<usize>) -> *mut WStr {
Units::Bytes(p) => (p as *mut (), false), Units::Bytes(p) => (p as *mut (), false),
Units::Wide(p) => (p as *mut (), true), Units::Wide(p) => (p as *mut (), true),
}; };
from_raw_parts(data, len, is_wide) from_raw_parts(data, WStrMetadata::new(len, is_wide))
} }