core: don't keep strong references to interned strings in the interner.

This is done by implementing a simple WeakSet which clears its stale
entries during tracing.
This commit is contained in:
Moulins 2023-03-24 23:50:21 +01:00 committed by Nathan Adams
parent 889a9bd009
commit 011cdd96ba
7 changed files with 334 additions and 65 deletions

13
Cargo.lock generated
View File

@ -1611,15 +1611,16 @@ dependencies = [
[[package]]
name = "gc-arena"
version = "0.2.2"
source = "git+https://github.com/kyren/gc-arena?rev=1a6310c0d5c98836fa9efb1c4773038ecfd5a92e#1a6310c0d5c98836fa9efb1c4773038ecfd5a92e"
source = "git+https://github.com/kyren/gc-arena?rev=fcc8764362d25f8724912dd7f09f2405779ec053#fcc8764362d25f8724912dd7f09f2405779ec053"
dependencies = [
"gc-arena-derive",
"sptr",
]
[[package]]
name = "gc-arena-derive"
version = "0.2.2"
source = "git+https://github.com/kyren/gc-arena?rev=1a6310c0d5c98836fa9efb1c4773038ecfd5a92e#1a6310c0d5c98836fa9efb1c4773038ecfd5a92e"
source = "git+https://github.com/kyren/gc-arena?rev=fcc8764362d25f8724912dd7f09f2405779ec053#fcc8764362d25f8724912dd7f09f2405779ec053"
dependencies = [
"proc-macro2",
"quote",
@ -3458,6 +3459,7 @@ dependencies = [
"futures",
"gc-arena",
"generational-arena",
"hashbrown 0.13.2",
"indexmap",
"instant",
"linkme",
@ -3475,6 +3477,7 @@ dependencies = [
"ruffle_render",
"ruffle_video",
"ruffle_wstr",
"scopeguard",
"serde",
"serde_json",
"smallvec",
@ -4036,6 +4039,12 @@ dependencies = [
"num-traits",
]
[[package]]
name = "sptr"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a"
[[package]]
name = "static_assertions"
version = "1.1.0"

View File

@ -35,7 +35,7 @@ repository = "https://github.com/ruffle-rs/ruffle"
version = "0.1.0"
[workspace.dependencies]
gc-arena = { git = "https://github.com/kyren/gc-arena", rev = "1a6310c0d5c98836fa9efb1c4773038ecfd5a92e" }
gc-arena = { git = "https://github.com/kyren/gc-arena", rev = "fcc8764362d25f8724912dd7f09f2405779ec053" }
# Don't optimize build scripts and macros.
[profile.release.build-override]

View File

@ -17,7 +17,7 @@ gc-arena = { workspace = true }
generational-arena = "0.2.8"
indexmap = "1.9.3"
tracing = "0.1.37"
ruffle_render = { path = "../render" }
ruffle_render = { path = "../render", features = ["tessellator"] }
ruffle_video = { path = "../video" }
ruffle_macros = { path = "macros" }
ruffle_wstr = { path = "../wstr" }
@ -49,6 +49,8 @@ bytemuck = "1.13.1"
clap = { version = "4.2.4", features = ["derive"], optional=true }
realfft = "3.2.0"
once_cell = "1.17.1"
hashbrown = { version = "0.13.2", features = ["raw"] }
scopeguard = "1.1.0"
[target.'cfg(not(target_family = "wasm"))'.dependencies.futures]
version = "0.3.28"

View File

@ -62,7 +62,7 @@ struct TranslationUnitData<'gc> {
scripts: Vec<Option<Script<'gc>>>,
/// All strings loaded from the ABC's strings list.
/// They're lazy loaded and offset by 1, with the 0th element being always None.
/// They're lazy loaded and offset by 1, with the 0th element being always the empty string.
strings: Vec<Option<AvmString<'gc>>>,
/// All namespaces loaded from the ABC's scripts list.
@ -233,28 +233,11 @@ impl<'gc> TranslationUnit<'gc> {
string_index: u32,
context: &mut GcContext<'_, 'gc>,
) -> Result<Option<AvmString<'gc>>, Error<'gc>> {
let mut write = self.0.write(context.gc_context);
if let Some(Some(string)) = write.strings.get(string_index as usize) {
return Ok(Some(*string));
}
if string_index == 0 {
return Ok(None);
Ok(None)
} else {
self.pool_string(string_index, context).map(Some)
}
let raw = write
.abc
.constant_pool
.strings
.get(string_index as usize - 1)
.ok_or_else(|| format!("Unknown string constant {string_index}"))?;
let avm_string = context
.interner
.intern_wstr(context.gc_context, ruffle_wstr::from_utf8(raw));
write.strings[string_index as usize] = Some(avm_string);
Ok(Some(avm_string))
}
/// Load a string from the ABC's constant pool.
@ -268,9 +251,28 @@ impl<'gc> TranslationUnit<'gc> {
string_index: u32,
context: &mut GcContext<'_, 'gc>,
) -> Result<AvmString<'gc>, Error<'gc>> {
Ok(self
.pool_string_option(string_index, context)?
.unwrap_or_else(AvmString::default))
let mut write = self.0.write(context.gc_context);
if let Some(Some(string)) = write.strings.get(string_index as usize) {
return Ok(*string);
}
let raw = if string_index == 0 {
""
} else {
write
.abc
.constant_pool
.strings
.get(string_index as usize - 1)
.ok_or_else(|| format!("Unknown string constant {string_index}"))?
};
let avm_string = context
.interner
.intern_wstr(context.gc_context, ruffle_wstr::from_utf8(raw));
write.strings[string_index as usize] = Some(avm_string);
Ok(avm_string)
}
/// Retrieve a static, or non-runtime, multiname from the current constant

View File

@ -1,4 +1,7 @@
use std::borrow::Cow;
use std::borrow::{Borrow, Cow};
use std::ops::Deref;
use gc_arena::Collect;
mod avm_string;
mod interner;
@ -21,3 +24,31 @@ impl SwfStrExt for swf::SwfStr {
}
}
}
/// This type only exists because `WString` doesn't implement `Collect`
#[derive(Collect, Eq, PartialEq, Hash)]
#[collect(require_static)]
struct OwnedWStr(WString);
impl Deref for OwnedWStr {
type Target = WStr;
#[inline(always)]
fn deref(&self) -> &WStr {
&self.0
}
}
impl Borrow<WStr> for OwnedWStr {
#[inline(always)]
fn borrow(&self) -> &WStr {
&self.0
}
}
impl Default for OwnedWStr {
#[inline(always)]
fn default() -> Self {
OwnedWStr(WString::new())
}
}

View File

@ -1,9 +1,10 @@
use ruffle_wstr::{wstr_impl_traits, WStr, WString};
use std::borrow::Cow;
use std::ops::Deref;
use gc_arena::{Collect, Gc, MutationContext};
use std::borrow::Cow;
use ruffle_wstr::{wstr_impl_traits, WStr, WString};
use crate::string::OwnedWStr;
#[derive(Clone, Copy, Collect)]
#[collect(no_drop)]
@ -12,10 +13,6 @@ enum Source<'gc> {
Static(&'static WStr),
}
#[derive(Collect)]
#[collect(require_static)]
struct OwnedWStr(WString);
#[derive(Clone, Copy, Collect)]
#[collect(no_drop)]
pub struct AvmString<'gc> {
@ -23,6 +20,19 @@ pub struct AvmString<'gc> {
}
impl<'gc> AvmString<'gc> {
pub(super) fn from_owned(atom: Gc<'gc, OwnedWStr>) -> Self {
Self {
source: Source::Owned(atom),
}
}
pub(super) fn to_owned(self, gc_context: MutationContext<'gc, '_>) -> Gc<'gc, OwnedWStr> {
match self.source {
Source::Owned(s) => s,
Source::Static(s) => Gc::allocate(gc_context, OwnedWStr(s.into())),
}
}
pub fn new_utf8<'s, S: Into<Cow<'s, str>>>(
gc_context: MutationContext<'gc, '_>,
string: S,

View File

@ -1,16 +1,18 @@
use std::borrow::Cow;
use std::borrow::{Borrow, Cow};
use std::cell::Cell;
use std::hash::{BuildHasher, Hash, Hasher};
use std::marker::PhantomData;
use std::ops::DerefMut;
use fnv::FnvHashSet;
use gc_arena::{Collect, MutationContext};
use ruffle_wstr::WStr;
use gc_arena::{Collect, CollectionContext, Gc, GcWeak, MutationContext};
use hashbrown::raw::{Bucket, RawTable};
use super::avm_string::AvmString;
use crate::string::{AvmString, OwnedWStr, WStr};
#[derive(Collect, Default)]
#[collect(no_drop)]
pub struct AvmStringInterner<'gc> {
// TODO(moulins): use some kind of weak map
interned: FnvHashSet<AvmString<'gc>>,
interned: WeakSet<'gc, OwnedWStr>,
}
impl<'gc> AvmStringInterner<'gc> {
@ -18,31 +20,244 @@ impl<'gc> AvmStringInterner<'gc> {
Self::default()
}
#[must_use]
pub fn intern_wstr<'a, S>(
&mut self,
gc_context: MutationContext<'gc, '_>,
s: S,
) -> AvmString<'gc>
where
S: AsRef<WStr> + Into<Cow<'a, WStr>>,
{
if let Some(s) = self.interned.get(s.as_ref()) {
*s
} else {
let s = AvmString::new(gc_context, s.into().into_owned());
self.interned.insert(s);
s
}
fn alloc(mc: MutationContext<'gc, '_>, s: Cow<'_, WStr>) -> Gc<'gc, OwnedWStr> {
Gc::allocate(mc, OwnedWStr(s.into_owned()))
}
#[must_use]
pub fn intern(&mut self, s: AvmString<'gc>) -> AvmString<'gc> {
if let Some(s) = self.interned.get(&s) {
*s
} else {
self.interned.insert(s);
s
pub fn intern_wstr<'a, S>(&mut self, mc: MutationContext<'gc, '_>, s: S) -> AvmString<'gc>
where
S: Into<Cow<'a, WStr>>,
{
let s = s.into();
let atom = match self.interned.entry(mc, s.as_ref()) {
(Some(atom), _) => atom,
(None, h) => self.interned.insert_fresh(mc, h, Self::alloc(mc, s)),
};
AvmString::from_owned(atom)
}
#[must_use]
pub fn get<'a>(&self, mc: MutationContext<'gc, '_>, s: &WStr) -> Option<AvmString<'gc>> {
self.interned.get(mc, s).map(AvmString::from_owned)
}
#[must_use]
pub fn intern(&mut self, mc: MutationContext<'gc, '_>, s: AvmString<'gc>) -> AvmString<'gc> {
let atom = match self.interned.entry(mc, s.as_wstr()) {
(Some(atom), _) => atom,
(None, h) => self.interned.insert_fresh(mc, h, s.to_owned(mc)),
};
AvmString::from_owned(atom)
}
}
/// A set holding weakly to its elements.
///
/// Stale entries get regularly cleaned up in response to memory pressure:
/// - in the tracing phase of each GC cycle;
/// - upon insertion when the set is at capacity.
#[derive(Default)]
struct WeakSet<'gc, T: 'gc> {
// Cannot use `HashSet` here, as `GcWeak` cannot implement `Hash`.
raw: CollectCell<'gc, RawTable<GcWeak<'gc, T>>>,
hasher: fnv::FnvBuildHasher,
}
struct FindResult<'gc, T: 'gc> {
value: Option<Gc<'gc, T>>,
stale: Option<Bucket<GcWeak<'gc, T>>>,
}
impl<'gc, T: 'gc> WeakSet<'gc, T> {
fn hash<K: Hash + ?Sized>(build_hasher: &impl BuildHasher, key: &K) -> u64 {
let mut hasher = build_hasher.build_hasher();
key.hash(&mut hasher);
hasher.finish()
}
/// Finds a live, matching entry with a given hash.
/// Returns both the matching entry and the first stale bucket found along the way.
fn find_inner(
&self,
mc: MutationContext<'gc, '_>,
hash: u64,
mut eq: impl FnMut(&T) -> bool,
) -> FindResult<'gc, T> {
let raw = self.raw.as_ref(mc);
let mut result = FindResult {
value: None,
stale: None,
};
// SAFETY: `iter_hash` doesn't outlive `raw`, and only returns full buckets.
unsafe {
for bucket in raw.iter_hash(hash) {
match bucket.as_ref().upgrade(mc) {
Some(strong) if eq(&strong) => {
result.value = Some(strong);
break;
}
None if result.stale.is_none() => {
result.stale = Some(bucket);
}
_ => (),
}
}
}
result
}
/// Finds the given key in the map.
fn get<Q>(&self, mc: MutationContext<'gc, '_>, key: &Q) -> Option<Gc<'gc, T>>
where
T: Borrow<Q> + Hash,
Q: Hash + Eq + ?Sized,
{
let hash = Self::hash(&self.hasher, key);
let result = self.find_inner(mc, hash, |strong| strong.borrow() == key);
result.value
}
/// Finds the given key in the map, and return its and its hash.
/// TODO: add proper entry API?
fn entry<Q>(&mut self, mc: MutationContext<'gc, '_>, key: &Q) -> (Option<Gc<'gc, T>>, u64)
where
T: Borrow<Q> + Hash,
Q: Hash + Eq + ?Sized,
{
let hasher = &self.hasher;
let hash = Self::hash(hasher, key);
let result = self.find_inner(mc, hash, |strong| strong.borrow() == key);
// Clear any stale bucket found; this ensures that reinserting
// a freshly pruned key does not grow the table.
if let Some(stale) = result.stale {
unsafe { self.raw.as_mut().erase(stale) }
}
(result.value, hash)
}
/// Inserts a new key in the set.
/// The key must not already exist, and `hash` must be its hash.
/// TODO: add proper entry API?
fn insert_fresh(
&mut self,
mc: MutationContext<'gc, '_>,
hash: u64,
key: Gc<'gc, T>,
) -> Gc<'gc, T>
where
T: Hash,
{
let weak = Gc::downgrade(key);
let raw = self.raw.as_mut();
let hasher = &self.hasher;
if raw.try_insert_no_grow(hash, weak).is_err() {
Self::prune_and_grow(raw, |w| w.upgrade(mc), |k| Self::hash(hasher, &**k));
raw.try_insert_no_grow(hash, weak)
.expect("failed to grow table");
}
key
}
/// Prune stale entries and resize the table to ensure at least one extra entry can be added.
#[cold]
fn prune_and_grow<K, B>(
raw: &mut RawTable<B>,
upgrade: impl Fn(&B) -> Option<K>,
hasher: impl Fn(&K) -> u64,
) {
// We *really* don't want to reallocate, so try to prune dead references first.
let all = raw.len();
Self::retain(raw, |b| upgrade(b).is_some());
let remaining = raw.len();
// Only reallocate if few entries were pruned.
if remaining >= all / 2 {
raw.reserve(all - remaining + 1, |b| match upgrade(b) {
Some(k) => hasher(&k),
None => unreachable!("unexpected stale entry"),
})
}
}
/// Filters the entries of a raw table.
fn retain<B>(raw: &mut RawTable<B>, mut f: impl FnMut(&mut B) -> bool) {
// SAFETY: `iter` doesn't outlive `raw`, and only return full buckets.
unsafe {
for bucket in raw.iter() {
if !f(bucket.as_mut()) {
raw.erase(bucket);
}
}
}
}
}
unsafe impl<'gc, T> Collect for WeakSet<'gc, T> {
fn trace(&self, cc: CollectionContext) {
// Prune entries known to be dead.
// Safe, as we never pick up new GC pointers from outside this allocation.
let mut guard = unsafe { self.raw.steal_for_trace() };
Self::retain(&mut *guard, |weak| {
let keep = !weak.is_dropped();
if keep {
// NOTE: The explicit dereference is necessary to not
// use the no-op `Collect` impl on references.
(*weak).trace(cc);
}
keep
});
}
}
/// Small utility to work-around the fact that `Collect::trace` only takes `&self`.
#[derive(Default)]
struct CollectCell<'gc, T> {
inner: Cell<T>,
_marker: PhantomData<Gc<'gc, T>>,
}
impl<'gc, T> CollectCell<'gc, T> {
#[inline(always)]
fn as_ref<'a>(&'a self, _mc: MutationContext<'gc, '_>) -> &'a T {
unsafe { &*self.inner.as_ptr() }
}
#[inline(always)]
fn as_mut(&mut self) -> &mut T {
self.inner.get_mut()
}
/// SAFETY: must be called inside a `Collect::trace` function.
///
/// An alternative would be to require a `CollectionContext` argument, but this is
/// still unsound in presence of nested arenas (preventing this would require a `'gc`
/// lifetime on `CollectionContext` and `Collect`):
///
/// ```rs,ignore
/// fn trace(&self, cc: CollectionContext) {
/// rootless_arena(|mc| {
/// let cell = CollectCell::<i32>::default();
/// let borrow: &i32 = dbg!(cell.as_ref(mc)); // 0
/// *cell.steal_for_trace(cc) = 1;
/// dbg!(borrow); // 1 - oh no!
/// });
/// }
/// ```
#[inline(always)]
unsafe fn steal_for_trace(&self) -> impl DerefMut<Target = T> + '_
where
T: Default,
{
let cell = &self.inner;
scopeguard::guard(cell.take(), |stolen| cell.set(stolen))
}
}