core: managed `AvmStringRepr`s can now point to a `&'static WStr`

This allows `&'static WStr`s to be converted into interned `AvmString`s
without moving the string data into an heap-allocated buffer.
This commit is contained in:
Moulins 2024-08-01 06:49:19 +02:00 committed by Tom Schuster
parent 63b373d739
commit 3e215610ca
4 changed files with 104 additions and 76 deletions

View File

@ -1,5 +1,7 @@
//! Declarative macro for defining AVM1 properties.
use std::borrow::Cow;
use crate::avm1::function::{Executable, FunctionObject, NativeFunction};
use crate::avm1::property::Attribute;
use crate::avm1::{Object, ScriptObject, TObject, Value};
@ -67,8 +69,10 @@ impl Declaration {
) -> Value<'gc> {
let mc = context.gc_context;
let name = ruffle_wstr::from_utf8(self.name);
let name = context.interner.intern_wstr(mc, name);
let name = match ruffle_wstr::from_utf8(self.name) {
Cow::Borrowed(name) => context.interner.intern_static(mc, name),
Cow::Owned(name) => context.interner.intern_wstr(mc, name),
};
let value = match self.kind {
DeclKind::Property { getter, setter } => {

View File

@ -21,19 +21,19 @@ pub struct AvmString<'gc> {
impl<'gc> AvmString<'gc> {
/// Turns a string to a fully owned (non-dependent) managed string.
pub(super) fn to_fully_owned(self, gc_context: &Mutation<'gc>) -> Gc<'gc, AvmStringRepr<'gc>> {
pub(super) fn to_fully_owned(self, mc: &Mutation<'gc>) -> Gc<'gc, AvmStringRepr<'gc>> {
match self.source {
Source::Managed(s) => {
if s.is_dependent() {
let repr = AvmStringRepr::from_raw(WString::from(self.as_wstr()), false);
Gc::new(gc_context, repr)
Gc::new(mc, repr)
} else {
s
}
}
Source::Static(s) => {
let repr = AvmStringRepr::from_raw(s.into(), false);
Gc::new(gc_context, repr)
let repr = AvmStringRepr::from_raw_static(s, false);
Gc::new(mc, repr)
}
}
}

View File

@ -3,7 +3,7 @@ use std::borrow::{Borrow, Cow};
use std::cell::Cell;
use std::hash::{BuildHasher, Hash, Hasher};
use std::marker::PhantomData;
use std::ops::DerefMut;
use std::ops::{Deref, DerefMut};
use gc_arena::{Collect, Gc, GcWeak, Mutation};
use hashbrown::HashSet;
@ -53,73 +53,87 @@ pub struct AvmStringInterner<'gc> {
interned: WeakSet<'gc, AvmStringRepr<'gc>>,
empty: Gc<'gc, AvmStringRepr<'gc>>,
chars: [Gc<'gc, AvmStringRepr<'gc>>; 128],
chars: [Gc<'gc, AvmStringRepr<'gc>>; INTERNED_CHAR_LEN],
}
const INTERNED_CHAR_LEN: usize = 128;
static INTERNED_CHARS: [u8; INTERNED_CHAR_LEN] = {
let mut chs = [0; INTERNED_CHAR_LEN];
let mut i = 0;
while i < chs.len() {
chs[i] = i as u8;
i += 1;
}
chs
};
impl<'gc> AvmStringInterner<'gc> {
pub fn new(mc: &Mutation<'gc>) -> Self {
let mut interned = WeakSet::default();
let mut intern_from_static = |s: &[u8]| {
let ch = Self::alloc(mc, Cow::Borrowed(WStr::from_units(s)));
interned.insert_fresh_no_hash(mc, ch)
// We can't use `Self::intern_static` because we don't have a Self yet.
let mut intern_from_static = |s: &'static [u8]| {
let wstr = WStr::from_units(s);
let repr = AvmStringRepr::from_raw_static(wstr, true);
interned.insert_fresh_no_hash(mc, Gc::new(mc, repr))
};
let empty = intern_from_static(b"");
let mut chars = [empty; 128];
for (i, elem) in chars.iter_mut().enumerate() {
*elem = intern_from_static(&[i as u8]);
}
Self {
empty: intern_from_static(b""),
chars: std::array::from_fn(|i| {
let c = &INTERNED_CHARS[i];
intern_from_static(std::slice::from_ref(c))
}),
interned,
empty,
chars,
}
}
fn alloc(mc: &Mutation<'gc>, s: Cow<'_, WStr>) -> Gc<'gc, AvmStringRepr<'gc>> {
// note: the string is already marked as interned
let repr = AvmStringRepr::from_raw(s.into_owned(), true);
Gc::new(mc, repr)
}
#[must_use]
pub fn intern_wstr<'a, S>(&mut self, mc: &Mutation<'gc>, s: S) -> AvmAtom<'gc>
where
S: Into<Cow<'a, WStr>>,
{
let s = s.into();
let atom = match self.interned.entry(mc, s.as_ref()) {
(Some(atom), _) => atom,
(None, h) => self.interned.insert_fresh(mc, h, Self::alloc(mc, s)),
};
AvmAtom(atom)
self.intern_inner(mc, s, |s| {
let repr = AvmStringRepr::from_raw(s.into_owned(), true);
Gc::new(mc, repr)
})
}
#[must_use]
pub fn get(&self, mc: &Mutation<'gc>, s: &WStr) -> Option<AvmAtom<'gc>> {
self.interned.get(mc, s).map(AvmAtom)
pub fn intern_static(&mut self, mc: &Mutation<'gc>, s: &'static WStr) -> AvmAtom<'gc> {
self.intern_inner(mc, s, |s| {
let repr = AvmStringRepr::from_raw_static(s, true);
Gc::new(mc, repr)
})
}
#[must_use]
pub fn intern(&mut self, mc: &Mutation<'gc>, s: AvmString<'gc>) -> AvmAtom<'gc> {
if let Some(atom) = s.as_interned() {
return atom;
}
let atom = match self.interned.entry(mc, s.as_wstr()) {
(Some(atom), _) => atom,
(None, h) => {
atom
} else {
self.intern_inner(mc, s, |s| {
let repr = s.to_fully_owned(mc);
repr.mark_interned();
self.interned.insert_fresh(mc, h, repr)
}
};
repr
})
}
}
AvmAtom(atom)
// The string returned by `f` should be interned, and equivalent to `s`.
fn intern_inner<S, F>(&mut self, mc: &Mutation<'gc>, s: S, f: F) -> AvmAtom<'gc>
where
S: Deref<Target = WStr>,
F: FnOnce(S) -> Gc<'gc, AvmStringRepr<'gc>>,
{
match self.interned.entry(mc, s.deref()) {
(Some(atom), _) => AvmAtom(atom),
(None, h) => {
let atom = self.interned.insert_fresh(mc, h, f(s));
AvmAtom(atom)
}
}
}
#[must_use]
@ -127,6 +141,11 @@ impl<'gc> AvmStringInterner<'gc> {
self.empty.into()
}
#[must_use]
pub fn get(&self, mc: &Mutation<'gc>, s: &WStr) -> Option<AvmAtom<'gc>> {
self.interned.get(mc, s).map(AvmAtom)
}
#[must_use]
pub fn get_char(&self, mc: &Mutation<'gc>, c: u16) -> AvmString<'gc> {
if let Some(s) = self.chars.get(c as usize) {

View File

@ -4,7 +4,7 @@ use std::ops::Deref;
use gc_arena::{Collect, Gc};
use ruffle_wstr::{panic_on_invalid_length, ptr as wptr, wstr_impl_traits, WStr, WString};
/// Internal representation of `AvmAtom`s and (owned) `AvmString`.
/// Internal representation of `AvmAtom`s and (owned) `AvmString`s.
///
/// Using this type directly is dangerous, as it can be used to violate
/// the interning invariants.
@ -19,17 +19,17 @@ pub struct AvmStringRepr<'gc> {
meta: wptr::WStrMetadata,
// We abuse WStrMetadata to store capacity and is_interned bit.
// If a string is Dependent, the capacity should always be 0.
// If a string is Static or Dependent, the capacity should always be 0.
capacity: Cell<wptr::WStrMetadata>,
// If a string is Dependent, this should always be 0.
// If a string is Static or Dependent, this should always be 0.
// If a string is Owned, this indicates used chars, including dependents.
// Example: assume a string a="abc" has 10 bytes of capacity (chars_used=3).
// Then, with a+"d", we produce a dependent string and owner's chars_used becomes 4.
// len <= chars_used <= capacity.
chars_used: Cell<u32>,
// If Some, the string is dependent. The owner is assumed to be non-dynamic.
// If Some, the string is Dependent. The owner is assumed to be non-dynamic.
owner: Option<Gc<'gc, Self>>,
}
@ -46,33 +46,31 @@ impl<'gc> AvmStringRepr<'gc> {
}
}
pub fn new_dependent(s: Gc<'gc, Self>, start: usize, end: usize) -> Self {
let wstr = &s[start..end];
let wstr_ptr = wstr as *const WStr;
pub fn from_raw_static(s: &'static WStr, interned: bool) -> Self {
// SAFETY: 'wstr' is a static WStr and doesn't require an owner to stay valid
unsafe { Self::new_dependent_raw(None, s, interned) }
}
Self {
owner: Some(s.owner().unwrap_or(s)),
ptr: wstr_ptr as *mut WStr as *mut (),
meta: unsafe { wptr::WStrMetadata::of(wstr_ptr) },
chars_used: Cell::new(0),
// Dependent strings are never interned
capacity: Cell::new(wptr::WStrMetadata::new32(0, false)),
}
pub fn new_dependent(s: Gc<'gc, Self>, start: usize, end: usize) -> Self {
let wstr = &s.as_ref()[start..end];
let owner = Some(s.owner().unwrap_or(s));
// Dependent strings are never interned
let interned = false;
// SAFETY: 'wstr' is a WStr pointing into 'owner'
unsafe { Self::new_dependent_raw(owner, wstr, interned) }
}
unsafe fn new_dependent_raw(
owner: Gc<'gc, Self>,
ptr: *const u8,
length: u32,
is_wide: bool,
owner: Option<Gc<'gc, Self>>,
wstr: &'gc WStr,
interned: bool,
) -> Self {
Self {
owner: Some(owner),
ptr: ptr as *mut (),
meta: wptr::WStrMetadata::new32(length, is_wide),
owner,
ptr: wstr as *const WStr as *mut (),
meta: wptr::WStrMetadata::of(wstr),
chars_used: Cell::new(0),
// Dependent strings are never interned
capacity: Cell::new(wptr::WStrMetadata::new32(0, false)),
capacity: Cell::new(wptr::WStrMetadata::new32(0, interned)),
}
}
@ -139,8 +137,12 @@ impl<'gc> AvmStringRepr<'gc> {
panic_on_invalid_length(new_len);
}
let ret =
Self::new_dependent_raw(left_origin, left_ptr, new_len as u32, left.is_wide());
let new_wstr = wptr::from_raw_parts(
left_ptr as *const (),
wptr::WStrMetadata::new(new_len, left.is_wide()),
);
// Dependent strings are never interned.
let ret = Self::new_dependent_raw(Some(left_origin), &*new_wstr, false);
return Some(ret);
}
}
@ -168,7 +170,7 @@ impl<'gc> AvmStringRepr<'gc> {
self.capacity.get().is_wide()
}
pub fn mark_interned(&self) {
pub(crate) fn mark_interned(&self) {
if self.is_dependent() {
panic!("bug: we interned a dependent string");
}
@ -180,12 +182,15 @@ impl<'gc> AvmStringRepr<'gc> {
impl<'gc> Drop for AvmStringRepr<'gc> {
fn drop(&mut self) {
if self.owner.is_none() {
let cap = self.capacity.get().len32();
if cap > 0 {
// SAFETY: we drop the `WString` we logically own.
unsafe {
let cap = self.capacity.get().len32();
let _ = WString::from_raw_parts(self.ptr, self.meta, cap);
}
debug_assert!(self.owner.is_none());
let _ = unsafe { WString::from_raw_parts(self.ptr, self.meta, cap) };
} else {
// Nothing to do, this is a Static or a Dependant string.
// It could also have been an empty owned WString, but
// these don't need to be dropped either.
}
}
}