core: Support dependent strings for concatenation

This commit is contained in:
Adrian Wielgosik 2024-05-13 18:22:01 +02:00 committed by Adrian Wielgosik
parent 3c944e35c7
commit d6dbc5e767
5 changed files with 154 additions and 9 deletions

View File

@ -172,13 +172,15 @@ fn concat<'gc>(
this: Object<'gc>,
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error<'gc>> {
let mut ret = WString::from(Value::from(this).coerce_to_string(activation)?.as_wstr());
let lhs = Value::from(this).coerce_to_string(activation)?;
let mut ret = lhs;
for arg in args {
let s = arg.coerce_to_string(activation)?;
ret.push_str(&s);
ret = AvmString::concat(activation.context.gc_context, ret, s);
}
Ok(AvmString::new(activation.context.gc_context, ret).into())
Ok(ret.into())
}
/// Implements `String.fromCharCode`

View File

@ -9,6 +9,8 @@ use crate::string::{AvmAtom, AvmStringRepr};
#[derive(Clone, Copy, Collect)]
#[collect(no_drop)]
enum Source<'gc> {
// TODO: Rename this to `Managed`, to avoid
// ambiguity with dependent/owned/owner terms.
Owned(Gc<'gc, AvmStringRepr<'gc>>),
Static(&'static WStr),
}
@ -38,6 +40,13 @@ impl<'gc> AvmString<'gc> {
}
}
pub fn as_managed(self) -> Option<Gc<'gc, AvmStringRepr<'gc>>> {
match self.source {
Source::Owned(s) => Some(s),
Source::Static(_) => None,
}
}
pub fn new_utf8<'s, S: Into<Cow<'s, str>>>(gc_context: &Mutation<'gc>, string: S) -> Self {
let buf = match string.into() {
Cow::Owned(utf8) => WString::from_utf8_owned(utf8),
@ -105,7 +114,28 @@ impl<'gc> AvmString<'gc> {
} else if right.is_empty() {
left
} else {
let mut out = WString::from(left.as_wstr());
if let Some(repr) = AvmStringRepr::try_append_inline(left, &right) {
return Self {
source: Source::Owned(Gc::new(gc_context, repr)),
};
}
// When doing a non-in-place append,
// Overallocate a bit so that further appends can be in-place.
// (Note that this means that all first-time appends will happen here and
// overallocate, even if done only once)
// This growth logic should be equivalent to AVM's, except I capped the growth at 1MB instead of 4MB.
let new_size = left.len() + right.len();
let new_capacity = if new_size < 32 {
32
} else if new_size > 1024 * 1024 {
new_size + 1024 * 1024
} else {
new_size * 2
};
let mut out = WString::with_capacity(new_capacity, left.is_wide() || right.is_wide());
out.push_str(&left);
out.push_str(&right);
Self::new(gc_context, out)
}

View File

@ -2,7 +2,7 @@ use std::cell::Cell;
use std::ops::Deref;
use gc_arena::Collect;
use ruffle_wstr::{ptr as wptr, wstr_impl_traits, WStr, WString};
use ruffle_wstr::{panic_on_invalid_length, ptr as wptr, wstr_impl_traits, WStr, WString};
use crate::string::avm_string::AvmString;
@ -16,13 +16,23 @@ pub struct AvmStringRepr<'gc> {
#[collect(require_static)]
ptr: *mut (),
// Length and is_wide bit.
#[collect(require_static)]
meta: wptr::WStrMetadata,
// We abuse the 'is_wide' bit for interning.
// We abuse WStrMetadata to store capacity and is_interned bit.
// If a string is Dependent, the capacity should always be 0.
#[collect(require_static)]
capacity: Cell<wptr::WStrMetadata>,
// If a string is Dependent, this should always be 0.
// If a string is Owned, this indicates used chars, including dependents.
// Example: assume a string a="abc" has 10 bytes of capacity (chars_used=3).
// Then, with a+"d", we produce a dependent string and owner's chars_used becomes 4.
// len <= chars_used <= capacity.
#[collect(require_static)]
chars_used: Cell<u32>,
// If Some, the string is dependent. The owner is assumed to be non-dynamic.
owner: Option<AvmString<'gc>>,
}
@ -35,6 +45,7 @@ impl<'gc> AvmStringRepr<'gc> {
ptr,
meta,
capacity,
chars_used: Cell::new(meta.len32()),
owner: None,
}
}
@ -45,7 +56,7 @@ impl<'gc> AvmStringRepr<'gc> {
let meta = unsafe { wptr::WStrMetadata::of(wstr_ptr) };
// Dependent strings are never interned
let capacity = Cell::new(wptr::WStrMetadata::new32(meta.len32(), false));
let capacity = Cell::new(wptr::WStrMetadata::new32(0, false));
let ptr = wstr_ptr as *mut WStr as *mut ();
let owner = if let Some(owner) = s.owner() {
@ -58,10 +69,112 @@ impl<'gc> AvmStringRepr<'gc> {
owner: Some(owner),
ptr,
meta,
chars_used: Cell::new(0),
capacity,
}
}
unsafe fn new_dependent_raw(
owner: AvmString<'gc>,
ptr: *const u8,
length: u32,
is_wide: bool,
) -> Self {
let meta = wptr::WStrMetadata::new32(length, is_wide);
// Dependent strings are never interned
let capacity = Cell::new(wptr::WStrMetadata::new32(0, false));
let ptr = ptr as *mut ();
Self {
owner: Some(owner),
ptr,
meta,
chars_used: Cell::new(0),
capacity,
}
}
pub fn try_append_inline(left: AvmString<'gc>, right: &WStr) -> Option<Self> {
// note: we could also in-place append a byte string to a wide string
// But it was skipped for now.
if left.is_wide() != right.is_wide() {
return None;
}
let left_origin_s = left.owner().unwrap_or(left);
if let (Some(left), Some(left_origin)) = (left.as_managed(), left_origin_s.as_managed()) {
let char_size = if left.is_wide() { 2 } else { 1 };
/*
assumptions:
- left.len <= left.chars_used <= left.capacity
- left_ptr is inside left_origin_ptr .. left_origin_ptr + left.chars_used
note: it's possible that left == left_origin.
*/
unsafe {
let left_origin_ptr = left_origin.ptr as *const u8;
let left_ptr = left.ptr as *const u8;
/*
Assume a="abc", b=a+"d", c=a.substr(1), we're running d=c+"e"
a -> abc
b -> abcd
c -> bc v left_capacity_end
a's memory -> abcd_______
^ first_requested
^ first_available
We can only append in-place if first_requested and first_available match
And we have enough spare capacity.
*/
let first_available =
left_origin_ptr.add(char_size * left_origin.chars_used.get() as usize);
let first_requested = left_ptr.add(char_size * left.len());
let mut chars_available = 0;
if first_available == first_requested {
let left_capacity_end =
left_origin_ptr.add(char_size * left_origin.capacity.get().len());
chars_available =
((left_capacity_end as usize) - (first_available as usize)) / char_size;
}
if chars_available >= right.len() {
let first_available = first_available as *mut u8;
let right_ptr = right as *const WStr as *const () as *const u8;
std::ptr::copy_nonoverlapping(
right_ptr,
first_available,
char_size * right.len(),
);
let new_chars_used: usize = left_origin.chars_used.get() as usize + right.len();
if new_chars_used >= u32::MAX as usize {
// This isn't really about the string length,
// but it's close enough?
panic_on_invalid_length(new_chars_used);
}
left_origin.chars_used.set(new_chars_used as u32);
let new_len = left.len() + right.len();
if new_len >= WStr::MAX_LEN {
panic_on_invalid_length(new_len);
}
let ret = Self::new_dependent_raw(
left_origin_s,
left_ptr,
new_len as u32,
left.is_wide(),
);
return Some(ret);
}
}
}
None
}
#[inline]
pub fn is_dependent(&self) -> bool {
self.owner.is_some()

View File

@ -11,7 +11,7 @@ pub struct WStr {
}
#[cold]
pub(super) fn panic_on_invalid_length(len: usize) -> ! {
pub fn panic_on_invalid_length(len: usize) -> ! {
panic!("Too many code units in Ruffle string (len = {})", len)
}

View File

@ -35,7 +35,7 @@ pub use ptr::WStrMetadata;
use alloc::borrow::Cow;
use core::borrow::Borrow;
use common::panic_on_invalid_length;
pub use common::panic_on_invalid_length;
/// Flattens a slice of strings, placing `sep` as a separator between each.
#[inline]