From 011cdd96baceccd26f05cdcd5d749e0fe9321223 Mon Sep 17 00:00:00 2001 From: Moulins Date: Fri, 24 Mar 2023 23:50:21 +0100 Subject: [PATCH] core: don't keep strong references to interned strings in the interner. This is done by implementing a simple WeakSet which clears its stale entries during tracing. --- Cargo.lock | 13 +- Cargo.toml | 2 +- core/Cargo.toml | 4 +- core/src/avm2/script.rs | 50 ++++--- core/src/string.rs | 33 +++- core/src/string/avm_string.rs | 24 ++- core/src/string/interner.rs | 273 ++++++++++++++++++++++++++++++---- 7 files changed, 334 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 982ee6a7b..aad97d4d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1611,15 +1611,16 @@ dependencies = [ [[package]] name = "gc-arena" version = "0.2.2" -source = "git+https://github.com/kyren/gc-arena?rev=1a6310c0d5c98836fa9efb1c4773038ecfd5a92e#1a6310c0d5c98836fa9efb1c4773038ecfd5a92e" +source = "git+https://github.com/kyren/gc-arena?rev=fcc8764362d25f8724912dd7f09f2405779ec053#fcc8764362d25f8724912dd7f09f2405779ec053" dependencies = [ "gc-arena-derive", + "sptr", ] [[package]] name = "gc-arena-derive" version = "0.2.2" -source = "git+https://github.com/kyren/gc-arena?rev=1a6310c0d5c98836fa9efb1c4773038ecfd5a92e#1a6310c0d5c98836fa9efb1c4773038ecfd5a92e" +source = "git+https://github.com/kyren/gc-arena?rev=fcc8764362d25f8724912dd7f09f2405779ec053#fcc8764362d25f8724912dd7f09f2405779ec053" dependencies = [ "proc-macro2", "quote", @@ -3458,6 +3459,7 @@ dependencies = [ "futures", "gc-arena", "generational-arena", + "hashbrown 0.13.2", "indexmap", "instant", "linkme", @@ -3475,6 +3477,7 @@ dependencies = [ "ruffle_render", "ruffle_video", "ruffle_wstr", + "scopeguard", "serde", "serde_json", "smallvec", @@ -4036,6 +4039,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index c3d25e662..5bcfcba7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ repository = "https://github.com/ruffle-rs/ruffle" version = "0.1.0" [workspace.dependencies] -gc-arena = { git = "https://github.com/kyren/gc-arena", rev = "1a6310c0d5c98836fa9efb1c4773038ecfd5a92e" } +gc-arena = { git = "https://github.com/kyren/gc-arena", rev = "fcc8764362d25f8724912dd7f09f2405779ec053" } # Don't optimize build scripts and macros. [profile.release.build-override] diff --git a/core/Cargo.toml b/core/Cargo.toml index 48d05bc51..c2bfa6395 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -17,7 +17,7 @@ gc-arena = { workspace = true } generational-arena = "0.2.8" indexmap = "1.9.3" tracing = "0.1.37" -ruffle_render = { path = "../render" } +ruffle_render = { path = "../render", features = ["tessellator"] } ruffle_video = { path = "../video" } ruffle_macros = { path = "macros" } ruffle_wstr = { path = "../wstr" } @@ -49,6 +49,8 @@ bytemuck = "1.13.1" clap = { version = "4.2.4", features = ["derive"], optional=true } realfft = "3.2.0" once_cell = "1.17.1" +hashbrown = { version = "0.13.2", features = ["raw"] } +scopeguard = "1.1.0" [target.'cfg(not(target_family = "wasm"))'.dependencies.futures] version = "0.3.28" diff --git a/core/src/avm2/script.rs b/core/src/avm2/script.rs index d20599f1a..7c4a89c0a 100644 --- a/core/src/avm2/script.rs +++ b/core/src/avm2/script.rs @@ -62,7 +62,7 @@ struct TranslationUnitData<'gc> { scripts: Vec>>, /// All strings loaded from the ABC's strings list. - /// They're lazy loaded and offset by 1, with the 0th element being always None. + /// They're lazy loaded and offset by 1, with the 0th element being always the empty string. strings: Vec>>, /// All namespaces loaded from the ABC's scripts list. @@ -233,28 +233,11 @@ impl<'gc> TranslationUnit<'gc> { string_index: u32, context: &mut GcContext<'_, 'gc>, ) -> Result>, Error<'gc>> { - let mut write = self.0.write(context.gc_context); - if let Some(Some(string)) = write.strings.get(string_index as usize) { - return Ok(Some(*string)); - } - if string_index == 0 { - return Ok(None); + Ok(None) + } else { + self.pool_string(string_index, context).map(Some) } - - let raw = write - .abc - .constant_pool - .strings - .get(string_index as usize - 1) - .ok_or_else(|| format!("Unknown string constant {string_index}"))?; - - let avm_string = context - .interner - .intern_wstr(context.gc_context, ruffle_wstr::from_utf8(raw)); - - write.strings[string_index as usize] = Some(avm_string); - Ok(Some(avm_string)) } /// Load a string from the ABC's constant pool. @@ -268,9 +251,28 @@ impl<'gc> TranslationUnit<'gc> { string_index: u32, context: &mut GcContext<'_, 'gc>, ) -> Result, Error<'gc>> { - Ok(self - .pool_string_option(string_index, context)? - .unwrap_or_else(AvmString::default)) + let mut write = self.0.write(context.gc_context); + if let Some(Some(string)) = write.strings.get(string_index as usize) { + return Ok(*string); + } + + let raw = if string_index == 0 { + "" + } else { + write + .abc + .constant_pool + .strings + .get(string_index as usize - 1) + .ok_or_else(|| format!("Unknown string constant {string_index}"))? + }; + + let avm_string = context + .interner + .intern_wstr(context.gc_context, ruffle_wstr::from_utf8(raw)); + + write.strings[string_index as usize] = Some(avm_string); + Ok(avm_string) } /// Retrieve a static, or non-runtime, multiname from the current constant diff --git a/core/src/string.rs b/core/src/string.rs index 5fffab0a3..85f811d05 100644 --- a/core/src/string.rs +++ b/core/src/string.rs @@ -1,4 +1,7 @@ -use std::borrow::Cow; +use std::borrow::{Borrow, Cow}; +use std::ops::Deref; + +use gc_arena::Collect; mod avm_string; mod interner; @@ -21,3 +24,31 @@ impl SwfStrExt for swf::SwfStr { } } } + +/// This type only exists because `WString` doesn't implement `Collect` +#[derive(Collect, Eq, PartialEq, Hash)] +#[collect(require_static)] +struct OwnedWStr(WString); + +impl Deref for OwnedWStr { + type Target = WStr; + + #[inline(always)] + fn deref(&self) -> &WStr { + &self.0 + } +} + +impl Borrow for OwnedWStr { + #[inline(always)] + fn borrow(&self) -> &WStr { + &self.0 + } +} + +impl Default for OwnedWStr { + #[inline(always)] + fn default() -> Self { + OwnedWStr(WString::new()) + } +} diff --git a/core/src/string/avm_string.rs b/core/src/string/avm_string.rs index fd4fce606..a84579cd2 100644 --- a/core/src/string/avm_string.rs +++ b/core/src/string/avm_string.rs @@ -1,9 +1,10 @@ -use ruffle_wstr::{wstr_impl_traits, WStr, WString}; - +use std::borrow::Cow; use std::ops::Deref; use gc_arena::{Collect, Gc, MutationContext}; -use std::borrow::Cow; +use ruffle_wstr::{wstr_impl_traits, WStr, WString}; + +use crate::string::OwnedWStr; #[derive(Clone, Copy, Collect)] #[collect(no_drop)] @@ -12,10 +13,6 @@ enum Source<'gc> { Static(&'static WStr), } -#[derive(Collect)] -#[collect(require_static)] -struct OwnedWStr(WString); - #[derive(Clone, Copy, Collect)] #[collect(no_drop)] pub struct AvmString<'gc> { @@ -23,6 +20,19 @@ pub struct AvmString<'gc> { } impl<'gc> AvmString<'gc> { + pub(super) fn from_owned(atom: Gc<'gc, OwnedWStr>) -> Self { + Self { + source: Source::Owned(atom), + } + } + + pub(super) fn to_owned(self, gc_context: MutationContext<'gc, '_>) -> Gc<'gc, OwnedWStr> { + match self.source { + Source::Owned(s) => s, + Source::Static(s) => Gc::allocate(gc_context, OwnedWStr(s.into())), + } + } + pub fn new_utf8<'s, S: Into>>( gc_context: MutationContext<'gc, '_>, string: S, diff --git a/core/src/string/interner.rs b/core/src/string/interner.rs index 2991403bd..a470fd3ef 100644 --- a/core/src/string/interner.rs +++ b/core/src/string/interner.rs @@ -1,16 +1,18 @@ -use std::borrow::Cow; +use std::borrow::{Borrow, Cow}; +use std::cell::Cell; +use std::hash::{BuildHasher, Hash, Hasher}; +use std::marker::PhantomData; +use std::ops::DerefMut; -use fnv::FnvHashSet; -use gc_arena::{Collect, MutationContext}; -use ruffle_wstr::WStr; +use gc_arena::{Collect, CollectionContext, Gc, GcWeak, MutationContext}; +use hashbrown::raw::{Bucket, RawTable}; -use super::avm_string::AvmString; +use crate::string::{AvmString, OwnedWStr, WStr}; #[derive(Collect, Default)] #[collect(no_drop)] pub struct AvmStringInterner<'gc> { - // TODO(moulins): use some kind of weak map - interned: FnvHashSet>, + interned: WeakSet<'gc, OwnedWStr>, } impl<'gc> AvmStringInterner<'gc> { @@ -18,31 +20,244 @@ impl<'gc> AvmStringInterner<'gc> { Self::default() } - #[must_use] - pub fn intern_wstr<'a, S>( - &mut self, - gc_context: MutationContext<'gc, '_>, - s: S, - ) -> AvmString<'gc> - where - S: AsRef + Into>, - { - if let Some(s) = self.interned.get(s.as_ref()) { - *s - } else { - let s = AvmString::new(gc_context, s.into().into_owned()); - self.interned.insert(s); - s - } + fn alloc(mc: MutationContext<'gc, '_>, s: Cow<'_, WStr>) -> Gc<'gc, OwnedWStr> { + Gc::allocate(mc, OwnedWStr(s.into_owned())) } #[must_use] - pub fn intern(&mut self, s: AvmString<'gc>) -> AvmString<'gc> { - if let Some(s) = self.interned.get(&s) { - *s - } else { - self.interned.insert(s); - s + pub fn intern_wstr<'a, S>(&mut self, mc: MutationContext<'gc, '_>, s: S) -> AvmString<'gc> + where + S: Into>, + { + let s = s.into(); + let atom = match self.interned.entry(mc, s.as_ref()) { + (Some(atom), _) => atom, + (None, h) => self.interned.insert_fresh(mc, h, Self::alloc(mc, s)), + }; + + AvmString::from_owned(atom) + } + + #[must_use] + pub fn get<'a>(&self, mc: MutationContext<'gc, '_>, s: &WStr) -> Option> { + self.interned.get(mc, s).map(AvmString::from_owned) + } + + #[must_use] + pub fn intern(&mut self, mc: MutationContext<'gc, '_>, s: AvmString<'gc>) -> AvmString<'gc> { + let atom = match self.interned.entry(mc, s.as_wstr()) { + (Some(atom), _) => atom, + (None, h) => self.interned.insert_fresh(mc, h, s.to_owned(mc)), + }; + + AvmString::from_owned(atom) + } +} + +/// A set holding weakly to its elements. +/// +/// Stale entries get regularly cleaned up in response to memory pressure: +/// - in the tracing phase of each GC cycle; +/// - upon insertion when the set is at capacity. +#[derive(Default)] +struct WeakSet<'gc, T: 'gc> { + // Cannot use `HashSet` here, as `GcWeak` cannot implement `Hash`. + raw: CollectCell<'gc, RawTable>>, + hasher: fnv::FnvBuildHasher, +} + +struct FindResult<'gc, T: 'gc> { + value: Option>, + stale: Option>>, +} + +impl<'gc, T: 'gc> WeakSet<'gc, T> { + fn hash(build_hasher: &impl BuildHasher, key: &K) -> u64 { + let mut hasher = build_hasher.build_hasher(); + key.hash(&mut hasher); + hasher.finish() + } + + /// Finds a live, matching entry with a given hash. + /// Returns both the matching entry and the first stale bucket found along the way. + fn find_inner( + &self, + mc: MutationContext<'gc, '_>, + hash: u64, + mut eq: impl FnMut(&T) -> bool, + ) -> FindResult<'gc, T> { + let raw = self.raw.as_ref(mc); + let mut result = FindResult { + value: None, + stale: None, + }; + + // SAFETY: `iter_hash` doesn't outlive `raw`, and only returns full buckets. + unsafe { + for bucket in raw.iter_hash(hash) { + match bucket.as_ref().upgrade(mc) { + Some(strong) if eq(&strong) => { + result.value = Some(strong); + break; + } + None if result.stale.is_none() => { + result.stale = Some(bucket); + } + _ => (), + } + } + } + + result + } + + /// Finds the given key in the map. + fn get(&self, mc: MutationContext<'gc, '_>, key: &Q) -> Option> + where + T: Borrow + Hash, + Q: Hash + Eq + ?Sized, + { + let hash = Self::hash(&self.hasher, key); + let result = self.find_inner(mc, hash, |strong| strong.borrow() == key); + result.value + } + + /// Finds the given key in the map, and return its and its hash. + /// TODO: add proper entry API? + fn entry(&mut self, mc: MutationContext<'gc, '_>, key: &Q) -> (Option>, u64) + where + T: Borrow + Hash, + Q: Hash + Eq + ?Sized, + { + let hasher = &self.hasher; + let hash = Self::hash(hasher, key); + let result = self.find_inner(mc, hash, |strong| strong.borrow() == key); + + // Clear any stale bucket found; this ensures that reinserting + // a freshly pruned key does not grow the table. + if let Some(stale) = result.stale { + unsafe { self.raw.as_mut().erase(stale) } + } + + (result.value, hash) + } + + /// Inserts a new key in the set. + /// The key must not already exist, and `hash` must be its hash. + /// TODO: add proper entry API? + fn insert_fresh( + &mut self, + mc: MutationContext<'gc, '_>, + hash: u64, + key: Gc<'gc, T>, + ) -> Gc<'gc, T> + where + T: Hash, + { + let weak = Gc::downgrade(key); + let raw = self.raw.as_mut(); + let hasher = &self.hasher; + + if raw.try_insert_no_grow(hash, weak).is_err() { + Self::prune_and_grow(raw, |w| w.upgrade(mc), |k| Self::hash(hasher, &**k)); + raw.try_insert_no_grow(hash, weak) + .expect("failed to grow table"); + } + + key + } + + /// Prune stale entries and resize the table to ensure at least one extra entry can be added. + #[cold] + fn prune_and_grow( + raw: &mut RawTable, + upgrade: impl Fn(&B) -> Option, + hasher: impl Fn(&K) -> u64, + ) { + // We *really* don't want to reallocate, so try to prune dead references first. + let all = raw.len(); + Self::retain(raw, |b| upgrade(b).is_some()); + let remaining = raw.len(); + + // Only reallocate if few entries were pruned. + if remaining >= all / 2 { + raw.reserve(all - remaining + 1, |b| match upgrade(b) { + Some(k) => hasher(&k), + None => unreachable!("unexpected stale entry"), + }) + } + } + + /// Filters the entries of a raw table. + fn retain(raw: &mut RawTable, mut f: impl FnMut(&mut B) -> bool) { + // SAFETY: `iter` doesn't outlive `raw`, and only return full buckets. + unsafe { + for bucket in raw.iter() { + if !f(bucket.as_mut()) { + raw.erase(bucket); + } + } } } } + +unsafe impl<'gc, T> Collect for WeakSet<'gc, T> { + fn trace(&self, cc: CollectionContext) { + // Prune entries known to be dead. + // Safe, as we never pick up new GC pointers from outside this allocation. + let mut guard = unsafe { self.raw.steal_for_trace() }; + Self::retain(&mut *guard, |weak| { + let keep = !weak.is_dropped(); + if keep { + // NOTE: The explicit dereference is necessary to not + // use the no-op `Collect` impl on references. + (*weak).trace(cc); + } + keep + }); + } +} + +/// Small utility to work-around the fact that `Collect::trace` only takes `&self`. +#[derive(Default)] +struct CollectCell<'gc, T> { + inner: Cell, + _marker: PhantomData>, +} + +impl<'gc, T> CollectCell<'gc, T> { + #[inline(always)] + fn as_ref<'a>(&'a self, _mc: MutationContext<'gc, '_>) -> &'a T { + unsafe { &*self.inner.as_ptr() } + } + + #[inline(always)] + fn as_mut(&mut self) -> &mut T { + self.inner.get_mut() + } + + /// SAFETY: must be called inside a `Collect::trace` function. + /// + /// An alternative would be to require a `CollectionContext` argument, but this is + /// still unsound in presence of nested arenas (preventing this would require a `'gc` + /// lifetime on `CollectionContext` and `Collect`): + /// + /// ```rs,ignore + /// fn trace(&self, cc: CollectionContext) { + /// rootless_arena(|mc| { + /// let cell = CollectCell::::default(); + /// let borrow: &i32 = dbg!(cell.as_ref(mc)); // 0 + /// *cell.steal_for_trace(cc) = 1; + /// dbg!(borrow); // 1 - oh no! + /// }); + /// } + /// ``` + #[inline(always)] + unsafe fn steal_for_trace(&self) -> impl DerefMut + '_ + where + T: Default, + { + let cell = &self.inner; + scopeguard::guard(cell.take(), |stolen| cell.set(stolen)) + } +}