avm2: implement string.split for regex (#7363)
* avm2: implement string.split for regex * Compressed the testing for regexp and unwrapping thereof * * Moved the split logic into the regex object * Factored out a method for utf-16 matching * Added tests * formatting * * replaced manual counting with storage.length() * clippy cleanup * Address review comments * fix import path for WString * remove redundant variable in return statement * error passing via '?' instead of unwrap()
This commit is contained in:
parent
ded77ab46a
commit
80d1a8449a
|
@ -408,20 +408,23 @@ fn split<'gc>(
|
|||
.into(),
|
||||
);
|
||||
}
|
||||
if delimiter
|
||||
.as_object()
|
||||
.map(|o| o.as_regexp().is_some())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
log::warn!("string.split(regex) - not implemented");
|
||||
}
|
||||
|
||||
let this = Value::from(this).coerce_to_string(activation)?;
|
||||
let delimiter = delimiter.coerce_to_string(activation)?;
|
||||
let limit = match args.get(1).unwrap_or(&Value::Undefined) {
|
||||
Value::Undefined => usize::MAX,
|
||||
limit => limit.coerce_to_i32(activation)?.max(0) as usize,
|
||||
};
|
||||
|
||||
if let Some(mut regexp) = delimiter
|
||||
.as_object()
|
||||
.as_ref()
|
||||
.and_then(|o| o.as_regexp_mut(activation.context.gc_context))
|
||||
{
|
||||
return Ok(regexp.split(activation, this, limit)?.into());
|
||||
}
|
||||
|
||||
let delimiter = delimiter.coerce_to_string(activation)?;
|
||||
|
||||
let storage = if delimiter.is_empty() {
|
||||
// When using an empty delimiter, Str::split adds an extra beginning and trailing item, but Flash does not.
|
||||
// e.g., split("foo", "") returns ["", "f", "o", "o", ""] in Rust but ["f, "o", "o"] in Flash.
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::avm2::activation::Activation;
|
||||
use crate::avm2::Error;
|
||||
use crate::avm2::{ArrayObject, ArrayStorage, Object};
|
||||
use crate::string::WString;
|
||||
use crate::string::{AvmString, Units, WStrToUtf8};
|
||||
use bitflags::bitflags;
|
||||
use gc_arena::Collect;
|
||||
|
@ -144,10 +148,55 @@ impl<'gc> RegExp<'gc> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn exec(&mut self, text: AvmString<'gc>) -> Option<regress::Match> {
|
||||
let global = self.flags.contains(RegExpFlags::GLOBAL);
|
||||
let start = if global { self.last_index } else { 0 };
|
||||
let re_match = self.find_utf8_match_at(text, start, |text, mut re_match| {
|
||||
pub fn split(
|
||||
&mut self,
|
||||
activation: &mut Activation<'_, 'gc, '_>,
|
||||
text: AvmString<'gc>,
|
||||
limit: usize,
|
||||
) -> Result<Object<'gc>, Error> {
|
||||
let mut storage = ArrayStorage::new(0);
|
||||
// The empty regex is a special case which splits into characters.
|
||||
if self.source.is_empty() {
|
||||
let mut it = text.chars().take(limit);
|
||||
while let Some(Ok(c)) = it.next() {
|
||||
storage.push(
|
||||
AvmString::new(activation.context.gc_context, WString::from_char(c)).into(),
|
||||
);
|
||||
}
|
||||
return ArrayObject::from_storage(activation, storage);
|
||||
}
|
||||
|
||||
let mut start = 0;
|
||||
while let Some(m) = self.find_utf16_match(text, start) {
|
||||
if m.range.end == start {
|
||||
break;
|
||||
}
|
||||
storage.push(
|
||||
AvmString::new(activation.context.gc_context, &text[start..m.range.start]).into(),
|
||||
);
|
||||
if storage.length() >= limit {
|
||||
break;
|
||||
}
|
||||
for c in m.captures.iter().filter_map(Option::as_ref) {
|
||||
storage.push(
|
||||
AvmString::new(activation.context.gc_context, &text[c.start..c.end]).into(),
|
||||
);
|
||||
if storage.length() >= limit {
|
||||
break; // Intentional bug to match Flash.
|
||||
// Causes adding parts past limit.
|
||||
}
|
||||
}
|
||||
|
||||
start = m.range.end;
|
||||
}
|
||||
if storage.length() < limit {
|
||||
storage.push(AvmString::new(activation.context.gc_context, &text[start..]).into());
|
||||
}
|
||||
ArrayObject::from_storage(activation, storage)
|
||||
}
|
||||
|
||||
fn find_utf16_match(&mut self, text: AvmString<'gc>, start: usize) -> Option<regress::Match> {
|
||||
self.find_utf8_match_at(text, start, |text, mut re_match| {
|
||||
// Sort the capture endpoints by increasing index, so that CachedText::utf16_index is efficient.
|
||||
let mut utf8_indices = re_match
|
||||
.captures
|
||||
|
@ -162,10 +211,13 @@ impl<'gc> RegExp<'gc> {
|
|||
for i in utf8_indices {
|
||||
*i = text.utf16_index(*i).unwrap();
|
||||
}
|
||||
|
||||
re_match
|
||||
})?;
|
||||
|
||||
})
|
||||
}
|
||||
pub fn exec(&mut self, text: AvmString<'gc>) -> Option<regress::Match> {
|
||||
let global = self.flags.contains(RegExpFlags::GLOBAL);
|
||||
let start = if global { self.last_index } else { 0 };
|
||||
let re_match = self.find_utf16_match(text, start)?;
|
||||
if global {
|
||||
self.last_index = re_match.end();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
package {
|
||||
import flash.display.MovieClip;
|
||||
|
||||
public class Test extends MovieClip {
|
||||
public function Test() {
|
||||
|
||||
// note: compiled manually with AIR SDK
|
||||
|
||||
trace('// var text = "a.b.c";');
|
||||
var text = "a.b.c";
|
||||
|
||||
trace('// text.split("a.b.c")');
|
||||
trace(text.split("a.b.c"));
|
||||
trace('// text.split(".")');
|
||||
trace(text.split("."));
|
||||
trace('// text.split("")');
|
||||
trace(text.split(""));
|
||||
|
||||
trace('// text.split()');
|
||||
trace(text.split());
|
||||
|
||||
trace('// text.split(regex)');
|
||||
var regex = /b+/
|
||||
trace("abbabc".split(regex));
|
||||
|
||||
trace('// no match')
|
||||
trace("ccccc".split(/b/));
|
||||
|
||||
trace('// match all')
|
||||
var regex = /.*/
|
||||
trace("cccc".split(regex));
|
||||
|
||||
trace('// empty string, match all')
|
||||
trace("".split(/.*/));
|
||||
|
||||
trace('// multibyte chars')
|
||||
trace("ąąbąą".split(/b/))
|
||||
|
||||
trace('// Group expansion')
|
||||
trace("abba".split(/(b(b))/))
|
||||
|
||||
trace('// Split on empty regex')
|
||||
trace("aął".split(/(?:)/))
|
||||
|
||||
trace('// Split on non-empty regex with zero-length match')
|
||||
trace("aąbcde".split(/f*/))
|
||||
|
||||
trace('// Limit')
|
||||
trace("aąbaababa".split(/b/,3))
|
||||
|
||||
trace('// Limit on group captures - flash returns 6 parts instead of 5')
|
||||
trace("aąbbaabbabbabbabbabba".split(/(b(b))/,5))
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,4 +7,23 @@ a,b,c
|
|||
a,.,b,.,c
|
||||
// text.split()
|
||||
a.b.c
|
||||
// text.split(regex) - unimplemented
|
||||
// text.split(regex)
|
||||
a,a,c
|
||||
// no match
|
||||
ccccc
|
||||
// match all
|
||||
,
|
||||
// empty string, match all
|
||||
|
||||
// multibyte chars
|
||||
ąą,ąą
|
||||
// Group expansion
|
||||
a,bb,b,a
|
||||
// Split on empty regex
|
||||
aął
|
||||
// Split on non-empty regex with zero-length match
|
||||
aąbcde
|
||||
// Limit
|
||||
aą,aa,a
|
||||
// Limit on group captures - flash returns 6 parts instead of 5
|
||||
aą,bb,b,aa,bb,a
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
package {
|
||||
import flash.display.MovieClip;
|
||||
|
||||
public class Test extends MovieClip {
|
||||
public function Test() {
|
||||
|
||||
// note: compiled manually with AIR SDK
|
||||
|
||||
trace('// var text = "a.b.c";');
|
||||
var text = "a.b.c";
|
||||
|
||||
trace('// text.split("a.b.c")');
|
||||
trace(text.split("a.b.c"));
|
||||
trace('// text.split(".")');
|
||||
trace(text.split("."));
|
||||
trace('// text.split("")');
|
||||
trace(text.split(""));
|
||||
|
||||
trace('// text.split()');
|
||||
trace(text.split());
|
||||
trace('// text.split(regex) - unimplemented');
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Binary file not shown.
Loading…
Reference in New Issue