From 3e215610ca605060bb26b5fa0f2aaa8dca5b31f4 Mon Sep 17 00:00:00 2001 From: Moulins Date: Thu, 1 Aug 2024 06:49:19 +0200 Subject: [PATCH] core: managed `AvmStringRepr`s can now point to a `&'static WStr` This allows `&'static WStr`s to be converted into interned `AvmString`s without moving the string data into an heap-allocated buffer. --- core/src/avm1/property_decl.rs | 8 ++- core/src/string/avm_string.rs | 8 +-- core/src/string/interner.rs | 95 ++++++++++++++++++++-------------- core/src/string/repr.rs | 69 ++++++++++++------------ 4 files changed, 104 insertions(+), 76 deletions(-) diff --git a/core/src/avm1/property_decl.rs b/core/src/avm1/property_decl.rs index 90c864787..60ee81684 100644 --- a/core/src/avm1/property_decl.rs +++ b/core/src/avm1/property_decl.rs @@ -1,5 +1,7 @@ //! Declarative macro for defining AVM1 properties. +use std::borrow::Cow; + use crate::avm1::function::{Executable, FunctionObject, NativeFunction}; use crate::avm1::property::Attribute; use crate::avm1::{Object, ScriptObject, TObject, Value}; @@ -67,8 +69,10 @@ impl Declaration { ) -> Value<'gc> { let mc = context.gc_context; - let name = ruffle_wstr::from_utf8(self.name); - let name = context.interner.intern_wstr(mc, name); + let name = match ruffle_wstr::from_utf8(self.name) { + Cow::Borrowed(name) => context.interner.intern_static(mc, name), + Cow::Owned(name) => context.interner.intern_wstr(mc, name), + }; let value = match self.kind { DeclKind::Property { getter, setter } => { diff --git a/core/src/string/avm_string.rs b/core/src/string/avm_string.rs index 4edc1528a..6b3152117 100644 --- a/core/src/string/avm_string.rs +++ b/core/src/string/avm_string.rs @@ -21,19 +21,19 @@ pub struct AvmString<'gc> { impl<'gc> AvmString<'gc> { /// Turns a string to a fully owned (non-dependent) managed string. - pub(super) fn to_fully_owned(self, gc_context: &Mutation<'gc>) -> Gc<'gc, AvmStringRepr<'gc>> { + pub(super) fn to_fully_owned(self, mc: &Mutation<'gc>) -> Gc<'gc, AvmStringRepr<'gc>> { match self.source { Source::Managed(s) => { if s.is_dependent() { let repr = AvmStringRepr::from_raw(WString::from(self.as_wstr()), false); - Gc::new(gc_context, repr) + Gc::new(mc, repr) } else { s } } Source::Static(s) => { - let repr = AvmStringRepr::from_raw(s.into(), false); - Gc::new(gc_context, repr) + let repr = AvmStringRepr::from_raw_static(s, false); + Gc::new(mc, repr) } } } diff --git a/core/src/string/interner.rs b/core/src/string/interner.rs index af4af1fc4..a2be80ae2 100644 --- a/core/src/string/interner.rs +++ b/core/src/string/interner.rs @@ -3,7 +3,7 @@ use std::borrow::{Borrow, Cow}; use std::cell::Cell; use std::hash::{BuildHasher, Hash, Hasher}; use std::marker::PhantomData; -use std::ops::DerefMut; +use std::ops::{Deref, DerefMut}; use gc_arena::{Collect, Gc, GcWeak, Mutation}; use hashbrown::HashSet; @@ -53,73 +53,87 @@ pub struct AvmStringInterner<'gc> { interned: WeakSet<'gc, AvmStringRepr<'gc>>, empty: Gc<'gc, AvmStringRepr<'gc>>, - chars: [Gc<'gc, AvmStringRepr<'gc>>; 128], + chars: [Gc<'gc, AvmStringRepr<'gc>>; INTERNED_CHAR_LEN], } +const INTERNED_CHAR_LEN: usize = 128; +static INTERNED_CHARS: [u8; INTERNED_CHAR_LEN] = { + let mut chs = [0; INTERNED_CHAR_LEN]; + let mut i = 0; + while i < chs.len() { + chs[i] = i as u8; + i += 1; + } + chs +}; + impl<'gc> AvmStringInterner<'gc> { pub fn new(mc: &Mutation<'gc>) -> Self { let mut interned = WeakSet::default(); - let mut intern_from_static = |s: &[u8]| { - let ch = Self::alloc(mc, Cow::Borrowed(WStr::from_units(s))); - interned.insert_fresh_no_hash(mc, ch) + // We can't use `Self::intern_static` because we don't have a Self yet. + let mut intern_from_static = |s: &'static [u8]| { + let wstr = WStr::from_units(s); + let repr = AvmStringRepr::from_raw_static(wstr, true); + interned.insert_fresh_no_hash(mc, Gc::new(mc, repr)) }; - let empty = intern_from_static(b""); - - let mut chars = [empty; 128]; - for (i, elem) in chars.iter_mut().enumerate() { - *elem = intern_from_static(&[i as u8]); - } - Self { + empty: intern_from_static(b""), + chars: std::array::from_fn(|i| { + let c = &INTERNED_CHARS[i]; + intern_from_static(std::slice::from_ref(c)) + }), interned, - empty, - chars, } } - fn alloc(mc: &Mutation<'gc>, s: Cow<'_, WStr>) -> Gc<'gc, AvmStringRepr<'gc>> { - // note: the string is already marked as interned - let repr = AvmStringRepr::from_raw(s.into_owned(), true); - Gc::new(mc, repr) - } - #[must_use] pub fn intern_wstr<'a, S>(&mut self, mc: &Mutation<'gc>, s: S) -> AvmAtom<'gc> where S: Into>, { let s = s.into(); - let atom = match self.interned.entry(mc, s.as_ref()) { - (Some(atom), _) => atom, - (None, h) => self.interned.insert_fresh(mc, h, Self::alloc(mc, s)), - }; - - AvmAtom(atom) + self.intern_inner(mc, s, |s| { + let repr = AvmStringRepr::from_raw(s.into_owned(), true); + Gc::new(mc, repr) + }) } #[must_use] - pub fn get(&self, mc: &Mutation<'gc>, s: &WStr) -> Option> { - self.interned.get(mc, s).map(AvmAtom) + pub fn intern_static(&mut self, mc: &Mutation<'gc>, s: &'static WStr) -> AvmAtom<'gc> { + self.intern_inner(mc, s, |s| { + let repr = AvmStringRepr::from_raw_static(s, true); + Gc::new(mc, repr) + }) } #[must_use] pub fn intern(&mut self, mc: &Mutation<'gc>, s: AvmString<'gc>) -> AvmAtom<'gc> { if let Some(atom) = s.as_interned() { - return atom; - } - - let atom = match self.interned.entry(mc, s.as_wstr()) { - (Some(atom), _) => atom, - (None, h) => { + atom + } else { + self.intern_inner(mc, s, |s| { let repr = s.to_fully_owned(mc); repr.mark_interned(); - self.interned.insert_fresh(mc, h, repr) - } - }; + repr + }) + } + } - AvmAtom(atom) + // The string returned by `f` should be interned, and equivalent to `s`. + fn intern_inner(&mut self, mc: &Mutation<'gc>, s: S, f: F) -> AvmAtom<'gc> + where + S: Deref, + F: FnOnce(S) -> Gc<'gc, AvmStringRepr<'gc>>, + { + match self.interned.entry(mc, s.deref()) { + (Some(atom), _) => AvmAtom(atom), + (None, h) => { + let atom = self.interned.insert_fresh(mc, h, f(s)); + AvmAtom(atom) + } + } } #[must_use] @@ -127,6 +141,11 @@ impl<'gc> AvmStringInterner<'gc> { self.empty.into() } + #[must_use] + pub fn get(&self, mc: &Mutation<'gc>, s: &WStr) -> Option> { + self.interned.get(mc, s).map(AvmAtom) + } + #[must_use] pub fn get_char(&self, mc: &Mutation<'gc>, c: u16) -> AvmString<'gc> { if let Some(s) = self.chars.get(c as usize) { diff --git a/core/src/string/repr.rs b/core/src/string/repr.rs index 71b851fbc..01cc2744d 100644 --- a/core/src/string/repr.rs +++ b/core/src/string/repr.rs @@ -4,7 +4,7 @@ use std::ops::Deref; use gc_arena::{Collect, Gc}; use ruffle_wstr::{panic_on_invalid_length, ptr as wptr, wstr_impl_traits, WStr, WString}; -/// Internal representation of `AvmAtom`s and (owned) `AvmString`. +/// Internal representation of `AvmAtom`s and (owned) `AvmString`s. /// /// Using this type directly is dangerous, as it can be used to violate /// the interning invariants. @@ -19,17 +19,17 @@ pub struct AvmStringRepr<'gc> { meta: wptr::WStrMetadata, // We abuse WStrMetadata to store capacity and is_interned bit. - // If a string is Dependent, the capacity should always be 0. + // If a string is Static or Dependent, the capacity should always be 0. capacity: Cell, - // If a string is Dependent, this should always be 0. + // If a string is Static or Dependent, this should always be 0. // If a string is Owned, this indicates used chars, including dependents. // Example: assume a string a="abc" has 10 bytes of capacity (chars_used=3). // Then, with a+"d", we produce a dependent string and owner's chars_used becomes 4. // len <= chars_used <= capacity. chars_used: Cell, - // If Some, the string is dependent. The owner is assumed to be non-dynamic. + // If Some, the string is Dependent. The owner is assumed to be non-dynamic. owner: Option>, } @@ -46,33 +46,31 @@ impl<'gc> AvmStringRepr<'gc> { } } - pub fn new_dependent(s: Gc<'gc, Self>, start: usize, end: usize) -> Self { - let wstr = &s[start..end]; - let wstr_ptr = wstr as *const WStr; + pub fn from_raw_static(s: &'static WStr, interned: bool) -> Self { + // SAFETY: 'wstr' is a static WStr and doesn't require an owner to stay valid + unsafe { Self::new_dependent_raw(None, s, interned) } + } - Self { - owner: Some(s.owner().unwrap_or(s)), - ptr: wstr_ptr as *mut WStr as *mut (), - meta: unsafe { wptr::WStrMetadata::of(wstr_ptr) }, - chars_used: Cell::new(0), - // Dependent strings are never interned - capacity: Cell::new(wptr::WStrMetadata::new32(0, false)), - } + pub fn new_dependent(s: Gc<'gc, Self>, start: usize, end: usize) -> Self { + let wstr = &s.as_ref()[start..end]; + let owner = Some(s.owner().unwrap_or(s)); + // Dependent strings are never interned + let interned = false; + // SAFETY: 'wstr' is a WStr pointing into 'owner' + unsafe { Self::new_dependent_raw(owner, wstr, interned) } } unsafe fn new_dependent_raw( - owner: Gc<'gc, Self>, - ptr: *const u8, - length: u32, - is_wide: bool, + owner: Option>, + wstr: &'gc WStr, + interned: bool, ) -> Self { Self { - owner: Some(owner), - ptr: ptr as *mut (), - meta: wptr::WStrMetadata::new32(length, is_wide), + owner, + ptr: wstr as *const WStr as *mut (), + meta: wptr::WStrMetadata::of(wstr), chars_used: Cell::new(0), - // Dependent strings are never interned - capacity: Cell::new(wptr::WStrMetadata::new32(0, false)), + capacity: Cell::new(wptr::WStrMetadata::new32(0, interned)), } } @@ -139,8 +137,12 @@ impl<'gc> AvmStringRepr<'gc> { panic_on_invalid_length(new_len); } - let ret = - Self::new_dependent_raw(left_origin, left_ptr, new_len as u32, left.is_wide()); + let new_wstr = wptr::from_raw_parts( + left_ptr as *const (), + wptr::WStrMetadata::new(new_len, left.is_wide()), + ); + // Dependent strings are never interned. + let ret = Self::new_dependent_raw(Some(left_origin), &*new_wstr, false); return Some(ret); } } @@ -168,7 +170,7 @@ impl<'gc> AvmStringRepr<'gc> { self.capacity.get().is_wide() } - pub fn mark_interned(&self) { + pub(crate) fn mark_interned(&self) { if self.is_dependent() { panic!("bug: we interned a dependent string"); } @@ -180,12 +182,15 @@ impl<'gc> AvmStringRepr<'gc> { impl<'gc> Drop for AvmStringRepr<'gc> { fn drop(&mut self) { - if self.owner.is_none() { + let cap = self.capacity.get().len32(); + if cap > 0 { // SAFETY: we drop the `WString` we logically own. - unsafe { - let cap = self.capacity.get().len32(); - let _ = WString::from_raw_parts(self.ptr, self.meta, cap); - } + debug_assert!(self.owner.is_none()); + let _ = unsafe { WString::from_raw_parts(self.ptr, self.meta, cap) }; + } else { + // Nothing to do, this is a Static or a Dependant string. + // It could also have been an empty owned WString, but + // these don't need to be dropped either. } } }