diff --git a/wstr/src/buf.rs b/wstr/src/buf.rs index 8687e0f4f..96ce09e92 100644 --- a/wstr/src/buf.rs +++ b/wstr/src/buf.rs @@ -164,14 +164,14 @@ impl WString { unsafe fn steal_buf(&mut self) -> ManuallyDrop, Vec>> { let ptr = self.ptr.as_ptr(); let data = super::ptr::data(ptr); - let len = super::ptr::len(ptr); + let meta = super::ptr::metadata(ptr); let cap = self.capacity; // SAFETY: we reconstruct the Vec deconstructed in `Self::from_buf`. - let buffer = if super::ptr::is_wide(ptr) { - Units::Wide(Vec::from_raw_parts(data as *mut u16, len, cap)) + let buffer = if meta.is_wide() { + Units::Wide(Vec::from_raw_parts(data as *mut u16, meta.len(), cap)) } else { - Units::Bytes(Vec::from_raw_parts(data as *mut u8, len, cap)) + Units::Bytes(Vec::from_raw_parts(data as *mut u8, meta.len(), cap)) }; ManuallyDrop::new(buffer) } @@ -367,6 +367,16 @@ impl Clone for WString { } } +impl ToOwned for WStr { + type Owned = WString; + + fn to_owned(&self) -> Self::Owned { + let mut buf = WString::new(); + buf.push_str(self); + buf + } +} + impl Deref for WString { type Target = WStr; #[inline] diff --git a/wstr/src/common.rs b/wstr/src/common.rs index 3fdb7daa6..c0ada5da9 100644 --- a/wstr/src/common.rs +++ b/wstr/src/common.rs @@ -150,14 +150,14 @@ impl WStr { #[inline] pub fn is_wide(&self) -> bool { // SAFETY: `self` is a valid `WStr`. - unsafe { ptr::is_wide(ptr::ptr_mut(self)) } + unsafe { ptr::metadata(ptr::ptr_mut(self)).is_wide() } } /// Returns the number of code units. #[inline] pub fn len(&self) -> usize { // SAFETY: `self` is a valid `WStr`. - unsafe { ptr::len(ptr::ptr_mut(self)) } + unsafe { ptr::metadata(ptr::ptr_mut(self)).len() } } /// Returns `true` if `self` contains no code units. diff --git a/wstr/src/ptr.rs b/wstr/src/ptr.rs index 59ab6b068..84fd1bbed 100644 --- a/wstr/src/ptr.rs +++ b/wstr/src/ptr.rs @@ -1,22 +1,21 @@ -use alloc::borrow::ToOwned; use core::ops::Range; use core::ptr::{slice_from_raw_parts, slice_from_raw_parts_mut}; -use super::{Units, WString}; +use super::Units; #[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))] compile_error!("WStr only supports 32-bits and 64-bits targets"); /// The maximum string length, equals to 2³¹-1. pub const MAX_STRING_LEN: usize = 0x7FFF_FFFF; -const WIDE_MASK: usize = MAX_STRING_LEN + 1; +const WIDE_MASK: u32 = MAX_STRING_LEN as u32 + 1; /// A UCS2 string slice, analoguous to `&'a str`. #[repr(transparent)] pub struct WStr { /// The internal `WStr` representation. /// - /// What we actually want here is a custom DST, but they don't exist be we must cheat + /// What we actually want here is a custom DST, but they don't exist so we must cheat /// and abuse the slice metadata field. /// /// The data pointer points to the start of the units buffer, which is either a @@ -28,7 +27,6 @@ pub struct WStr { /// - for `Units::Wide`, it is a one. /// /// Note that on 64-bits targets, this leaves the high 32 bits of the length unused. - /// (TODO: find a nice way to expose them for usage by other types?) /// /// # (Un)soundness /// @@ -47,19 +45,54 @@ pub struct WStr { _repr: [()], } -impl ToOwned for WStr { - type Owned = WString; +/// The metadata of a `WStr` pointer. This is always 4 bytes wide, even on 64-bits targets. +/// +/// The layout of `WStr` depends on the value of `self.is_wide()`: +/// - if `false`, it has the layout of `[u8; self.len()]`; +/// - if `true`, it has the layout of `[u16; self.len()]`. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct WStrMetadata(u32); - fn to_owned(&self) -> Self::Owned { - let mut buf = WString::new(); - buf.push_str(self); - buf +impl WStrMetadata { + /// SAFETY: raw must fit in a u32 + #[inline(always)] + const unsafe fn from_usize(raw: usize) -> Self { + if raw > u32::MAX as usize { + if cfg!(debug_assertions) { + panic!("invalid WStr metadata"); + } else { + core::hint::unreachable_unchecked() + } + } + + Self(raw as u32) + } + + /// Assemble `WStr` metadata from its components. + /// + /// # Safety + /// `len` must be less than or equal to `MAX_STRING_LEN`. + #[inline(always)] + pub const unsafe fn new(len: usize, is_wide: bool) -> Self { + Self::from_usize(len | if is_wide { WIDE_MASK as usize } else { 0 }) + } + + /// Returns whether this metadata describes a wide `WStr`. + #[inline(always)] + pub const fn is_wide(self) -> bool { + (self.0 & WIDE_MASK) != 0 + } + + /// Returns the length of the described `WStr`. This is never greater than `MAX_STRING_LEN`. + #[inline(always)] + pub const fn len(self) -> usize { + (self.0 & (WIDE_MASK - 1)) as usize } } /// Convenience method to turn a `&T` into a `*mut T`. #[inline] -pub fn ptr_mut(t: &T) -> *mut T { +pub(crate) fn ptr_mut(t: &T) -> *mut T { t as *const T as *mut T } @@ -83,37 +116,21 @@ pub fn data(ptr: *mut WStr) -> *mut () { ptr.cast::<()>() } -/// Returns the length of the raw `WStr` slice. -/// -/// This is always less than or equals to `MAX_STRING_LEN`. +/// Returns the metadata part of a a raw `WStr` pointer. /// /// # Safety /// - `ptr` must point to some allocated storage of arbitrary size. +/// - the pointer metadata must be valid. #[inline] -pub unsafe fn len(ptr: *mut WStr) -> usize { - raw_len(ptr as *mut [()]) & MAX_STRING_LEN -} - -/// Returns `true` if the raw `WStr` slice is wide. -/// -/// # Safety -/// - `ptr` must point to some allocated storage of arbitrary size. -#[inline] -pub unsafe fn is_wide(ptr: *mut WStr) -> bool { - raw_len(ptr as *mut [()]) & WIDE_MASK != 0 +pub unsafe fn metadata(ptr: *mut WStr) -> WStrMetadata { + let raw = raw_len(ptr as *mut [()]); + WStrMetadata::from_usize(raw) } /// Creates a `WStr` pointer from its raw parts. -/// -/// # Safety -/// - `len` must be less than or equals to `MAX_STRING_LEN` -/// - `data` must point to allocated storage fitting the layout of: -/// - `[u8; len]` if `is_wide` is `false`; -/// - `[u16; len]` if `is_wide` is `true`. #[inline] -pub unsafe fn from_raw_parts(data: *mut (), len: usize, is_wide: bool) -> *mut WStr { - let raw_len = len | if is_wide { WIDE_MASK } else { 0 }; - let slice = slice_from_raw_parts(data, raw_len); +pub fn from_raw_parts(data: *mut (), metadata: WStrMetadata) -> *mut WStr { + let slice = slice_from_raw_parts(data, metadata.0 as usize); slice as *mut WStr } @@ -121,54 +138,49 @@ pub unsafe fn from_raw_parts(data: *mut (), len: usize, is_wide: bool) -> *mut W /// /// # Safety /// - the buffer length must be less than or equals to `MAX_STRING_LEN` -/// - the buffer must point to allocated storage fitting the layout of: -/// - `[u8; len]` if it is a `Units::Bytes`; -/// - `[u16; len]` if it is a `Units::Wide`. +/// - the buffer must point to allocated storage of arbitrary size. #[inline] pub unsafe fn from_units(units: Units<*mut [u8], *mut [u16]>) -> *mut WStr { let (data, len, is_wide) = match units { Units::Bytes(us) => (us as *mut (), raw_len(us), false), Units::Wide(us) => (us as *mut (), raw_len(us), true), }; - from_raw_parts(data, len, is_wide) + + from_raw_parts(data, WStrMetadata::new(len, is_wide)) } -/// Gets a reference to the buffer pointed by `ptr`. +/// Gets a pointer to the buffer designated by `ptr`. /// /// # Safety /// - `ptr` must point to some allocated storage of arbitrary size. #[inline] pub unsafe fn units(ptr: *mut WStr) -> Units<*mut [u8], *mut [u16]> { - let (data, len) = (data(ptr), len(ptr)); - if is_wide(ptr) { - Units::Wide(slice_from_raw_parts_mut(data as *mut u16, len)) + let (data, meta) = (data(ptr), metadata(ptr)); + if meta.is_wide() { + Units::Wide(slice_from_raw_parts_mut(data as *mut u16, meta.len())) } else { - Units::Bytes(slice_from_raw_parts_mut(data as *mut u8, len)) + Units::Bytes(slice_from_raw_parts_mut(data as *mut u8, meta.len())) } } -/// Gets a pointer to the `n`th unit of this `WStr. +/// Gets a pointer to the `n`th unit of this `WStr`. /// /// # Safety /// - `ptr` must point to a valid `WStr`; -/// - `i` must be less than or equals to `len(ptr)`. +/// - `i` must be less than or equals to `metadata(ptr).len()`. #[inline] pub unsafe fn offset(ptr: *mut WStr, i: usize) -> Units<*mut u8, *mut u16> { - // SAFETY: we have `index <= len(ptr) <= MAX_STRING_LEN < i32::MAX`, so: - // - `i` can be casted to `isize` on 32-bit and 64-bit targets; - // - the offset call is in bounds. - let n = i as isize; - if is_wide(ptr) { - Units::Wide((ptr as *mut u16).offset(n)) + if metadata(ptr).is_wide() { + Units::Wide((ptr as *mut u16).add(i)) } else { - Units::Bytes((ptr as *mut u8).offset(n)) + Units::Bytes((ptr as *mut u8).add(i)) } } /// Dereferences the `n`th unit of this `WStr`. /// /// # Safety /// - `ptr` must point to a valid `WStr` for reading; -/// - `i` must be less than `len(ptr)`. +/// - `i` must be less than `metadata(ptr).len()`. pub unsafe fn read_at(ptr: *mut WStr, i: usize) -> u16 { match offset(ptr, i) { Units::Bytes(p) => (*p).into(), @@ -181,7 +193,7 @@ pub unsafe fn read_at(ptr: *mut WStr, i: usize) -> u16 { /// # Safety /// - `ptr` must point to a valid `WStr`; /// - `range.start` must be less than or equals to `range.end`; -/// - `range.end` must be less than or equals to `len(ptr)`. +/// - `range.end` must be less than or equals to `metadata(ptr).len()`. #[inline] pub unsafe fn slice(ptr: *mut WStr, range: Range) -> *mut WStr { let len = range.end - range.start; @@ -189,5 +201,5 @@ pub unsafe fn slice(ptr: *mut WStr, range: Range) -> *mut WStr { Units::Bytes(p) => (p as *mut (), false), Units::Wide(p) => (p as *mut (), true), }; - from_raw_parts(data, len, is_wide) + from_raw_parts(data, WStrMetadata::new(len, is_wide)) }