core: Reimplement AVM string methods using our UCS2 API

This generally means that methods are more efficient, as we
don't need to encode to UTF16 on-the-fly to have correct indices.

This also fix some bugs:
 - charCode now properly handle surrogate pairs
 - calling lastIndexOf with the empty pattern and an OoB index now
properly returns the string length

Still missing is AVM2's String.match
This commit is contained in:
Moulins 2021-09-15 11:59:43 +02:00 committed by kmeisthax
parent 32fa20b857
commit 7f4af210b8
8 changed files with 264 additions and 262 deletions

View File

@ -295,7 +295,7 @@ pub fn as_set_prop_flags<'gc>(
Some(v) => {
let props = v.coerce_to_string(activation)?;
if props.contains(',') {
for prop_name in props.split(',') {
for prop_name in props.as_str().split(',') {
object.set_attributes(
activation.context.gc_context,
Some(AvmString::new(activation.context.gc_context, prop_name)),

View File

@ -1,5 +1,4 @@
//! `String` class impl
use crate::avm1::activation::Activation;
use crate::avm1::error::Error;
use crate::avm1::function::{Executable, FunctionObject};
@ -7,7 +6,7 @@ use crate::avm1::object::value_object::ValueObject;
use crate::avm1::property::Attribute;
use crate::avm1::property_decl::{define_properties_on, Declaration};
use crate::avm1::{ArrayObject, Object, TObject, Value};
use crate::string::{utils as string_utils, AvmString};
use crate::string::{utils as string_utils, AvmString, WString};
use gc_arena::MutationContext;
const PROTO_DECLS: &[Declaration] = declare_properties! {
@ -43,7 +42,7 @@ pub fn string<'gc>(
};
if let Some(mut vbox) = this.as_value_object() {
let len = value.encode_utf16().count();
let len = value.len();
vbox.define_value(
activation.context.gc_context,
"length",
@ -105,24 +104,21 @@ fn char_at<'gc>(
this: Object<'gc>,
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error<'gc>> {
// TODO: Will return REPLACEMENT_CHAR if this indexes a character outside the BMP, losing info about the surrogate.
// When we improve our string representation, the unpaired surrogate should be returned.
let this_val = Value::from(this);
let string = this_val.coerce_to_string(activation)?;
let i = args
.get(0)
.unwrap_or(&Value::Undefined)
.coerce_to_i32(activation)?;
let ret = if i >= 0 {
string
.encode_utf16()
.nth(i as usize)
.map(|c| string_utils::utf16_code_unit_to_char(c).to_string())
.unwrap_or_default()
} else {
"".into()
};
Ok(AvmString::new(activation.context.gc_context, ret).into())
let ret = usize::try_from(i)
.ok()
.and_then(|i| string.try_get(i))
.map(WString::from_unit)
.map(|ret| AvmString::new_ucs2(activation.context.gc_context, ret))
.unwrap_or_else(|| "".into());
Ok(ret.into())
}
fn char_code_at<'gc>(
@ -137,10 +133,7 @@ fn char_code_at<'gc>(
.unwrap_or(&Value::Undefined)
.coerce_to_i32(activation)?;
let ret = if i >= 0 {
this.encode_utf16()
.nth(i as usize)
.map(f64::from)
.unwrap_or(f64::NAN)
this.try_get(i as usize).map(f64::from).unwrap_or(f64::NAN)
} else {
f64::NAN
};
@ -152,12 +145,15 @@ fn concat<'gc>(
this: Object<'gc>,
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error<'gc>> {
let mut ret = Value::from(this).coerce_to_string(activation)?.to_string();
let mut ret: WString = Value::from(this)
.coerce_to_string(activation)?
.as_ucs2()
.into();
for arg in args {
let s = arg.coerce_to_string(activation)?;
ret.push_str(&s)
ret.push_str(s.as_ucs2())
}
Ok(AvmString::new(activation.context.gc_context, ret).into())
Ok(AvmString::new_ucs2(activation.context.gc_context, ret).into())
}
fn from_char_code<'gc>(
@ -165,17 +161,16 @@ fn from_char_code<'gc>(
_this: Object<'gc>,
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error<'gc>> {
// TODO: Unpaired surrogates will be replace with Unicode replacement char.
let mut out = String::with_capacity(args.len());
let mut out = WString::with_capacity(args.len(), false);
for arg in args {
let i = arg.coerce_to_u16(activation)?;
if i == 0 {
// Stop at a null-terminator.
break;
}
out.push(string_utils::utf16_code_unit_to_char(i));
out.push(i);
}
Ok(AvmString::new(activation.context.gc_context, out).into())
Ok(AvmString::new_ucs2(activation.context.gc_context, out).into())
}
fn index_of<'gc>(
@ -183,46 +178,21 @@ fn index_of<'gc>(
this: Object<'gc>,
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error<'gc>> {
let this = Value::from(this)
.coerce_to_string(activation)?
.encode_utf16()
.collect::<Vec<u16>>();
let this = Value::from(this).coerce_to_string(activation)?;
let pattern = match args.get(0) {
None => return Ok(Value::Undefined),
Some(s) => s
.clone()
.coerce_to_string(activation)?
.encode_utf16()
.collect::<Vec<_>>(),
};
let start_index = {
let n = args
.get(1)
.unwrap_or(&Value::Undefined)
.coerce_to_i32(activation)?;
if n >= 0 {
n as usize
} else {
0
}
Some(s) => s.clone().coerce_to_string(activation)?,
};
if start_index >= this.len() {
// Out of range
Ok((-1).into())
} else if pattern.is_empty() {
// Empty pattern is found immediately.
Ok((start_index as f64).into())
} else if let Some(mut pos) = this[start_index..]
.windows(pattern.len())
.position(|w| w == &pattern[..])
{
pos += start_index;
Ok((pos as f64).into())
} else {
// Not found
Ok((-1).into())
}
let start_index = match args.get(1) {
None | Some(Value::Undefined) => 0,
Some(n) => n.coerce_to_i32(activation)?.max(0) as usize,
};
this.try_slice(start_index..)
.and_then(|s| s.find(pattern.as_ucs2()))
.map(|i| Ok((i + start_index).into()))
.unwrap_or_else(|| Ok((-1).into())) // Out of range or not found
}
fn last_index_of<'gc>(
@ -230,38 +200,25 @@ fn last_index_of<'gc>(
this: Object<'gc>,
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error<'gc>> {
let this = Value::from(this)
.coerce_to_string(activation)?
.encode_utf16()
.collect::<Vec<u16>>();
let this = Value::from(this).coerce_to_string(activation)?;
let pattern = match args.get(0) {
None => return Ok(Value::Undefined),
Some(s) => s
.clone()
.coerce_to_string(activation)?
.encode_utf16()
.collect::<Vec<_>>(),
};
let start_index = match args.get(1) {
None | Some(Value::Undefined) => this.len(),
Some(n) => n.coerce_to_i32(activation)?.max(0) as usize,
Some(s) => s.clone().coerce_to_string(activation)?,
};
if pattern.is_empty() {
// Empty pattern is found immediately.
Ok(start_index.into())
} else if let Some((i, _)) = this[..]
.windows(pattern.len())
.enumerate()
.take(start_index + 1)
.rev()
.find(|(_, w)| *w == &pattern[..])
{
Ok(i.into())
} else {
// Not found
Ok((-1).into())
}
let start_index = match args.get(1) {
None | Some(Value::Undefined) => this.len(),
Some(n) => match usize::try_from(n.coerce_to_i32(activation)?) {
Ok(n) => n + pattern.len(),
Err(_) => return Ok((-1).into()), // Bail out on negative indices.
},
};
this.try_slice(..start_index)
.unwrap_or_else(|| this.as_ucs2())
.rfind(pattern.as_ucs2())
.map(|i| Ok(i.into()))
.unwrap_or_else(|| Ok((-1).into())) // Not found
}
fn slice<'gc>(
@ -276,24 +233,19 @@ fn slice<'gc>(
let this_val = Value::from(this);
let this = this_val.coerce_to_string(activation)?;
let this_len = this.encode_utf16().count();
let start_index = string_wrapping_index(
args.get(0)
.unwrap_or(&Value::Undefined)
.coerce_to_i32(activation)?,
this_len,
this.len(),
);
let end_index = match args.get(1) {
None | Some(Value::Undefined) => this_len,
Some(n) => string_wrapping_index(n.coerce_to_i32(activation)?, this_len),
None | Some(Value::Undefined) => this.len(),
Some(n) => string_wrapping_index(n.coerce_to_i32(activation)?, this.len()),
};
if start_index < end_index {
let ret = string_utils::utf16_iter_to_string(
this.encode_utf16()
.skip(start_index)
.take(end_index - start_index),
);
Ok(AvmString::new(activation.context.gc_context, ret).into())
let ret = WString::from(this.slice(start_index..end_index));
Ok(AvmString::new_ucs2(activation.context.gc_context, ret).into())
} else {
Ok("".into())
}
@ -314,22 +266,22 @@ fn split<'gc>(
limit => limit.coerce_to_i32(activation)?.max(0) as usize,
};
if delimiter.is_empty() {
// When using an empty delimiter, Rust's str::split adds an extra beginning and trailing item, but Flash does not.
// When using an empty delimiter, Str::split adds an extra beginning and trailing item, but Flash does not.
// e.g., split("foo", "") returns ["", "f", "o", "o", ""] in Rust but ["f, "o", "o"] in Flash.
// Special case this to match Flash's behavior.
Ok(ArrayObject::new(
activation.context.gc_context,
activation.context.avm1.prototypes().array,
this.chars()
.take(limit)
.map(|c| AvmString::new(activation.context.gc_context, c.to_string()).into()),
this.iter().take(limit).map(|c| {
AvmString::new_ucs2(activation.context.gc_context, WString::from_unit(c)).into()
}),
)
.into())
} else {
Ok(ArrayObject::new(
activation.context.gc_context,
activation.context.avm1.prototypes().array,
this.split(delimiter.as_ref())
this.split(delimiter.as_ucs2())
.take(limit)
.map(|c| AvmString::new(activation.context.gc_context, c.to_string()).into()),
)
@ -348,23 +300,25 @@ fn substr<'gc>(
let this_val = Value::from(this);
let this = this_val.coerce_to_string(activation)?;
let this_len = this.encode_utf16().count();
let start_index = string_wrapping_index(
args.get(0)
.unwrap_or(&Value::Undefined)
.coerce_to_i32(activation)?,
this_len,
this.len(),
);
let len = match args.get(1) {
None | Some(Value::Undefined) => this_len as i32,
None | Some(Value::Undefined) => this.len() as i32,
Some(n) => n.coerce_to_i32(activation)?,
};
let end_index = string_wrapping_index((start_index as i32) + len, this_len);
let len = end_index.saturating_sub(start_index);
let end_index = string_wrapping_index((start_index as i32) + len, this.len());
let ret = string_utils::utf16_iter_to_string(this.encode_utf16().skip(start_index).take(len));
Ok(AvmString::new(activation.context.gc_context, ret).into())
if start_index < end_index {
let ret = WString::from(this.slice(start_index..end_index));
Ok(AvmString::new_ucs2(activation.context.gc_context, ret).into())
} else {
Ok("".into())
}
}
fn substring<'gc>(
@ -378,24 +332,19 @@ fn substring<'gc>(
let this_val = Value::from(this);
let this = this_val.coerce_to_string(activation)?;
let this_len = this.encode_utf16().count();
let mut start_index = string_index(args.get(0).unwrap().coerce_to_i32(activation)?, this_len);
let mut start_index = string_index(args.get(0).unwrap().coerce_to_i32(activation)?, this.len());
let mut end_index = match args.get(1) {
None | Some(Value::Undefined) => this_len,
Some(n) => string_index(n.coerce_to_i32(activation)?, this_len),
None | Some(Value::Undefined) => this.len(),
Some(n) => string_index(n.coerce_to_i32(activation)?, this.len()),
};
// substring automatically swaps the start/end if they are flipped.
if end_index < start_index {
std::mem::swap(&mut end_index, &mut start_index);
}
let ret = string_utils::utf16_iter_to_string(
this.encode_utf16()
.skip(start_index)
.take(end_index - start_index),
);
Ok(AvmString::new(activation.context.gc_context, ret).into())
let ret = WString::from(this.slice(start_index..end_index));
Ok(AvmString::new_ucs2(activation.context.gc_context, ret).into())
}
fn to_lower_case<'gc>(
@ -405,11 +354,11 @@ fn to_lower_case<'gc>(
) -> Result<Value<'gc>, Error<'gc>> {
let this_val = Value::from(this);
let this = this_val.coerce_to_string(activation)?;
Ok(AvmString::new(
Ok(AvmString::new_ucs2(
activation.context.gc_context,
this.chars()
.map(string_utils::swf_char_to_lowercase)
.collect::<String>(),
this.iter()
.map(string_utils::swf_to_lowercase)
.collect::<WString>(),
)
.into())
}
@ -439,11 +388,11 @@ fn to_upper_case<'gc>(
) -> Result<Value<'gc>, Error<'gc>> {
let this_val = Value::from(this);
let this = this_val.coerce_to_string(activation)?;
Ok(AvmString::new(
Ok(AvmString::new_ucs2(
activation.context.gc_context,
this.chars()
.map(string_utils::swf_char_to_uppercase)
.collect::<String>(),
this.iter()
.map(string_utils::swf_to_uppercase)
.collect::<WString>(),
)
.into())
}

View File

@ -8,8 +8,7 @@ use crate::avm2::object::{primitive_allocator, Object, TObject};
use crate::avm2::value::Value;
use crate::avm2::Error;
use crate::avm2::{ArrayObject, ArrayStorage};
use crate::string::utils as string_utils;
use crate::string::AvmString;
use crate::string::{AvmString, WString};
use gc_arena::{GcCell, MutationContext};
use std::iter;
@ -53,7 +52,7 @@ fn length<'gc>(
) -> Result<Value<'gc>, Error> {
if let Some(this) = this {
if let Value::String(s) = this.value_of(activation.context.gc_context)? {
return Ok(s.encode_utf16().count().into());
return Ok(s.len().into());
}
}
@ -79,11 +78,11 @@ fn char_at<'gc>(
let index = if !n.is_nan() { n as usize } else { 0 };
let ret = s
.encode_utf16()
.nth(index)
.map(|c| string_utils::utf16_code_unit_to_char(c).to_string())
.try_get(index)
.map(WString::from_unit)
.map(|s| AvmString::new_ucs2(activation.context.gc_context, s))
.unwrap_or_default();
return Ok(AvmString::new(activation.context.gc_context, ret).into());
return Ok(ret.into());
}
}
@ -108,11 +107,7 @@ fn char_code_at<'gc>(
}
let index = if !n.is_nan() { n as usize } else { 0 };
let ret = s
.encode_utf16()
.nth(index)
.map(f64::from)
.unwrap_or(f64::NAN);
let ret = s.try_get(index).map(f64::from).unwrap_or(f64::NAN);
return Ok(ret.into());
}
}
@ -144,15 +139,16 @@ fn from_char_code<'gc>(
_this: Option<Object<'gc>>,
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error> {
let mut out = String::with_capacity(args.len());
let mut out = WString::with_capacity(args.len(), false);
for arg in args {
let i = arg.coerce_to_u32(activation)? as u16;
if i == 0 {
// Ignore nulls.
continue;
}
out.push(string_utils::utf16_code_unit_to_char(i));
out.push(i);
}
Ok(AvmString::new(activation.context.gc_context, out).into())
Ok(AvmString::new_ucs2(activation.context.gc_context, out).into())
}
/// Implements `String.indexOf`
@ -162,46 +158,22 @@ fn index_of<'gc>(
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error> {
if let Some(this) = this {
let this = Value::from(this)
.coerce_to_string(activation)?
.encode_utf16()
.collect::<Vec<u16>>();
let this = Value::from(this).coerce_to_string(activation)?;
let pattern = match args.get(0) {
None => return Ok(Value::Undefined),
Some(s) => s
.clone()
.coerce_to_string(activation)?
.encode_utf16()
.collect::<Vec<_>>(),
};
let start_index = {
let n = args
.get(1)
.unwrap_or(&Value::Undefined)
.coerce_to_i32(activation)?;
if n >= 0 {
n as usize
} else {
0
}
Some(s) => s.clone().coerce_to_string(activation)?,
};
return if start_index >= this.len() {
// Out of range
Ok((-1).into())
} else if pattern.is_empty() {
// Empty pattern is found immediately.
Ok((start_index as f64).into())
} else if let Some(mut pos) = this[start_index..]
.windows(pattern.len())
.position(|w| w == &pattern[..])
{
pos += start_index;
Ok((pos as f64).into())
} else {
// Not found
Ok((-1).into())
let start_index = match args.get(1) {
None | Some(Value::Undefined) => 0,
Some(n) => n.coerce_to_i32(activation)?.max(0) as usize,
};
return this
.try_slice(start_index..)
.and_then(|s| s.find(pattern.as_ucs2()))
.map(|i| Ok((i + start_index).into()))
.unwrap_or_else(|| Ok((-1).into())); // Out of range or not found
}
Ok(Value::Undefined)
@ -214,38 +186,26 @@ fn last_index_of<'gc>(
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error> {
if let Some(this) = this {
let this = Value::from(this)
.coerce_to_string(activation)?
.encode_utf16()
.collect::<Vec<u16>>();
let this = Value::from(this).coerce_to_string(activation)?;
let pattern = match args.get(0) {
None => return Ok(Value::Undefined),
Some(s) => s
.clone()
.coerce_to_string(activation)?
.encode_utf16()
.collect::<Vec<_>>(),
};
let start_index = match args.get(1) {
None | Some(Value::Undefined) => this.len(),
Some(n) => n.coerce_to_i32(activation)?.max(0) as usize,
Some(s) => s.clone().coerce_to_string(activation)?,
};
return if pattern.is_empty() {
// Empty pattern is found immediately.
Ok(start_index.into())
} else if let Some((i, _)) = this[..]
.windows(pattern.len())
.enumerate()
.take(start_index + 1)
.rev()
.find(|(_, w)| *w == &pattern[..])
{
Ok(i.into())
} else {
// Not found
Ok((-1).into())
let start_index = match args.get(1) {
None | Some(Value::Undefined) => this.len(),
Some(n) => match usize::try_from(n.coerce_to_i32(activation)?) {
Ok(n) => n + pattern.len(),
Err(_) => return Ok((-1).into()), // Bail out on negative indices.
},
};
return this
.try_slice(..start_index)
.unwrap_or_else(|| this.as_ucs2())
.rfind(pattern.as_ucs2())
.map(|i| Ok(i.into()))
.unwrap_or_else(|| Ok((-1).into())); // Not found
}
Ok(Value::Undefined)
@ -274,14 +234,14 @@ fn match_s<'gc>(
let mut last = regexp.last_index();
let old_last_index = regexp.last_index();
regexp.set_last_index(0);
while let Some(result) = regexp.exec(&this) {
while let Some(result) = regexp.exec(this.as_str()) {
if regexp.last_index() == last {
break;
}
storage.push(
AvmString::new(
activation.context.gc_context,
this[result.range()].to_string(),
this.as_str()[result.range()].to_string(),
)
.into(),
);
@ -297,10 +257,10 @@ fn match_s<'gc>(
} else {
let old = regexp.last_index();
regexp.set_last_index(0);
if let Some(result) = regexp.exec(&this) {
if let Some(result) = regexp.exec(this.as_str()) {
let substrings = result
.groups()
.map(|range| this[range.unwrap_or(0..0)].to_string());
.map(|range| this.as_str()[range.unwrap_or(0..0)].to_string());
let mut storage = ArrayStorage::new(0);
for substring in substrings {
@ -332,28 +292,23 @@ fn slice<'gc>(
) -> Result<Value<'gc>, Error> {
if let Some(this) = this {
let this = Value::from(this).coerce_to_string(activation)?;
let this_len = this.encode_utf16().count();
let start_index = match args.get(0) {
None => 0,
Some(n) => {
let n = n.coerce_to_number(activation)?;
string_wrapping_index(n, this_len)
string_wrapping_index(n, this.len())
}
};
let end_index = match args.get(1) {
None => this_len,
None => this.len(),
Some(n) => {
let n = n.coerce_to_number(activation)?;
string_wrapping_index(n, this_len)
string_wrapping_index(n, this.len())
}
};
return if start_index < end_index {
let ret = string_utils::utf16_iter_to_string(
this.encode_utf16()
.skip(start_index)
.take(end_index - start_index),
);
Ok(AvmString::new(activation.context.gc_context, ret).into())
let ret = WString::from(this.slice(start_index..end_index));
Ok(AvmString::new_ucs2(activation.context.gc_context, ret).into())
} else {
Ok("".into())
};
@ -390,30 +345,30 @@ fn split<'gc>(
Value::Undefined => usize::MAX,
limit => limit.coerce_to_i32(activation)?.max(0) as usize,
};
if delimiter.is_empty() {
// When using an empty delimiter, Rust's str::split adds an extra beginning and trailing item, but Flash does not.
let storage = if delimiter.is_empty() {
// When using an empty delimiter, Str::split adds an extra beginning and trailing item, but Flash does not.
// e.g., split("foo", "") returns ["", "f", "o", "o", ""] in Rust but ["f, "o", "o"] in Flash.
// Special case this to match Flash's behavior.
return Ok(ArrayObject::from_storage(
activation,
this.chars()
.take(limit)
.map(|c| AvmString::new(activation.context.gc_context, c.to_string()))
.collect(),
)
.unwrap()
.into());
this.iter()
.take(limit)
.map(|c| {
Value::from(AvmString::new_ucs2(
activation.context.gc_context,
WString::from_unit(c),
))
})
.collect()
} else {
return Ok(ArrayObject::from_storage(
activation,
this.split(delimiter.as_ref())
.take(limit)
.map(|c| AvmString::new(activation.context.gc_context, c.to_string()))
.collect(),
)
this.split(delimiter.as_ucs2())
.take(limit)
.map(|c| Value::from(AvmString::new_ucs2(activation.context.gc_context, c.into())))
.collect()
};
return Ok(ArrayObject::from_storage(activation, storage)
.unwrap()
.into());
}
}
Ok(Value::Undefined)
}
@ -432,13 +387,11 @@ fn substr<'gc>(
return Ok(Value::from(this));
}
let this_len = this.encode_utf16().count();
let start_index = string_wrapping_index(
args.get(0)
.unwrap_or(&Value::Number(0.))
.coerce_to_number(activation)?,
this_len,
this.len(),
);
let len = args
@ -446,15 +399,14 @@ fn substr<'gc>(
.unwrap_or(&Value::Number(0x7fffffff as f64))
.coerce_to_number(activation)?;
let len = if len == f64::INFINITY {
this_len
let end_index = if len == f64::INFINITY {
this.len()
} else {
len as usize
this.len().min(start_index + len as usize)
};
let ret =
string_utils::utf16_iter_to_string(this.encode_utf16().skip(start_index).take(len));
return Ok(AvmString::new(activation.context.gc_context, ret).into());
let ret = WString::from(this.slice(start_index..end_index));
return Ok(AvmString::new_ucs2(activation.context.gc_context, ret).into());
}
Ok(Value::Undefined)
@ -474,32 +426,26 @@ fn substring<'gc>(
return Ok(Value::from(this));
}
let this_len = this.encode_utf16().count();
let mut start_index = string_index(
args.get(0)
.unwrap_or(&Value::Number(0.))
.coerce_to_number(activation)?,
this_len,
this.len(),
);
let mut end_index = string_index(
args.get(1)
.unwrap_or(&Value::Number(0x7fffffff as f64))
.coerce_to_number(activation)?,
this_len,
this.len(),
);
if end_index < start_index {
std::mem::swap(&mut end_index, &mut start_index);
}
let ret = string_utils::utf16_iter_to_string(
this.encode_utf16()
.skip(start_index)
.take(end_index - start_index),
);
return Ok(AvmString::new(activation.context.gc_context, ret).into());
let ret = WString::from(this.slice(start_index..end_index));
return Ok(AvmString::new_ucs2(activation.context.gc_context, ret).into());
}
Ok(Value::Undefined)

View File

@ -23,7 +23,7 @@ pub const MAX_STRING_LEN: usize = raw::MAX_STRING_LEN;
pub use avm::AvmString;
pub use buf::WString;
pub use common::{BorrowWStr, BorrowWStrMut, Units};
pub use ops::Iter;
pub use ops::{Iter, Split};
pub use slice::{WStr, WStrMut};
use common::panic_on_invalid_length;

View File

@ -10,7 +10,7 @@ enum Source<'gc> {
// both `impl Deref<&str>` and O(1) UCS2 char access.
// TODO(moulins): remove the extra `String`
Owned(Gc<'gc, (String, WString)>),
// Should be an ASCII string, for zero-copy conversion into `Str<'_>`.
// Should be an ASCII string, for zero-copy conversion into `WStr<'_>`.
Static(&'static str),
}
@ -37,6 +37,7 @@ impl<'gc> AvmString<'gc> {
}
}
#[inline]
pub fn as_str(&self) -> &str {
self
}
@ -48,6 +49,12 @@ impl<'gc> AvmString<'gc> {
Source::Static(str) => WStr::from_units(str.as_bytes()),
}
}
impl_str_methods! {
lifetime: '_;
self: &Self;
deref: self.as_ucs2();
}
}
impl Default for AvmString<'_> {

View File

@ -137,6 +137,27 @@ macro_rules! impl_str_methods {
pub fn iter($self: $receiver) -> crate::string::ops::Iter<$lt> {
crate::string::ops::str_iter($deref)
}
/// Analogue of [`str::find`].
// TODO: add our own Pattern trait to support several kinds of needles?
#[inline]
pub fn find($self: $receiver, needle: WStr<'_>) -> Option<usize> {
crate::string::ops::str_find($deref, needle)
}
/// Analogue of [`str::rfind`].
// TODO: add our own Pattern trait to support several kinds of needles?
#[inline]
pub fn rfind($self: $receiver, needle: WStr<'_>) -> Option<usize> {
crate::string::ops::str_rfind($deref, needle)
}
/// Analogue of [`str::split`].
// TODO: add our own Pattern trait to support several kinds of needles?
#[inline]
pub fn split<'s>($self: $receiver, separator: WStr<'s>) -> crate::string::ops::Split<$lt, 's> {
crate::string::ops::str_split($deref, separator)
}
}
}

View File

@ -90,3 +90,55 @@ pub fn str_hash<H: Hasher>(s: WStr<'_>, state: &mut H) {
Units::Wide(us) => us.iter().for_each(|u| state.write_u16(*u)),
}
}
pub fn str_find(haystack: WStr<'_>, needle: WStr<'_>) -> Option<usize> {
let max = haystack.len().checked_sub(needle.len())?;
(0..=max).find(|i| haystack.slice(*i..*i + needle.len()) == needle)
}
pub fn str_rfind(haystack: WStr<'_>, needle: WStr<'_>) -> Option<usize> {
let max = haystack.len().checked_sub(needle.len())?;
(0..=max)
.rev()
.find(|i| haystack.slice(*i..*i + needle.len()) == needle)
}
#[inline]
pub fn str_split<'a, 'b>(string: WStr<'a>, separator: WStr<'b>) -> Split<'a, 'b> {
Split {
string,
separator,
done: false,
}
}
pub struct Split<'a, 'b> {
string: WStr<'a>,
separator: WStr<'b>,
done: bool,
}
impl<'a, 'b> Iterator for Split<'a, 'b> {
type Item = WStr<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.done {
return None;
}
match self.string.find(self.separator) {
Some(i) => {
let prefix = self.string.slice(..i);
let suffix = self.string.slice((i + self.separator.len())..);
self.string = suffix;
Some(prefix)
}
None => {
self.done = true;
Some(self.string)
}
}
}
}

View File

@ -42,6 +42,19 @@ pub fn utf16_code_unit_to_char(c: u16) -> char {
.unwrap_or(char::REPLACEMENT_CHARACTER)
}
/// Maps a UCS2 code unit to its lowercase variant according to the Flash Player.
/// Note that this mapping is different that Rust's `to_lowercase`.
pub fn swf_to_lowercase(c: u16) -> u16 {
if c < 0x80 {
return (c as u8).to_ascii_lowercase().into();
}
match LOWERCASE_TABLE.binary_search_by(|&(key, _)| key.cmp(&c)) {
Ok(i) => LOWERCASE_TABLE[i].1,
Err(_) => c,
}
}
/// Maps a char to its lowercase variant according to the Flash Player.
/// Note that this mapping is different that Rust's `to_lowercase`.
pub fn swf_char_to_lowercase(c: char) -> char {
@ -60,12 +73,26 @@ pub fn swf_char_to_lowercase(c: char) -> char {
}
}
/// Maps a UCS2 code unit to its uppercase variant according to the Flash Player.
/// Note that this mapping is different that Rust's `to_uppercase`.
pub fn swf_to_uppercase(c: u16) -> u16 {
if c < 0x80 {
return (c as u8).to_ascii_uppercase().into();
}
match UPPERCASE_TABLE.binary_search_by(|&(key, _)| key.cmp(&c)) {
Ok(i) => UPPERCASE_TABLE[i].1,
Err(_) => c,
}
}
/// Maps a char to its uppercase variant according to the Flash Player.
/// Note that this mapping is different that Rust's `to_uppercase`.
pub fn swf_char_to_uppercase(c: char) -> char {
if c.is_ascii() {
return c.to_ascii_uppercase();
}
let code_pt: u32 = c.into();
if code_pt <= u16::MAX.into() {
let code_pt = code_pt as u16;