avm1: Correct `parseFloat()`

Rewrite the implementation of `string_to_f64()` to match Flash behavior.
This affects `parseFloat()` as well as any `Value` to `f64` coercion.
This commit is contained in:
relrelb 2022-06-25 16:47:51 +03:00 committed by Mike Welsh
parent 0148fde852
commit f0db6d8c60
2 changed files with 187 additions and 126 deletions

View File

@ -254,76 +254,17 @@ pub fn get_nan<'gc>(
}
}
pub fn parse_float_impl(s: &WStr, allow_multiple_dots: bool) -> f64 {
let mut out_str = String::with_capacity(s.len());
// TODO: Implementing this in a very janky way for now,
// feeding the string to Rust's float parser.
// Flash's parser is much more lenient, so we have to massage
// the string into an acceptable format.
let mut allow_dot = true;
let mut allow_exp = true;
let mut allow_sign = true;
for unit in s.iter() {
let c = match u8::try_from(unit) {
Ok(c) => c,
// Invalid char, `parseFloat` ignores all trailing garbage.
Err(_) => break,
};
match c {
b'0'..=b'9' => {
allow_sign = false;
out_str.push(c.into());
}
b'+' | b'-' if allow_sign => {
// Sign allowed at first char and following e
allow_sign = false;
out_str.push(c.into());
}
b'.' if allow_exp => {
allow_sign = false;
if allow_dot {
allow_dot = false;
out_str.push(c.into());
} else {
// AVM1 allows multiple . except after e
if allow_multiple_dots {
allow_exp = false;
} else {
break;
}
}
}
b'e' | b'E' if allow_exp => {
allow_sign = true;
allow_exp = false;
allow_dot = false;
out_str.push(c.into());
}
// Invalid char, `parseFloat` ignores all trailing garbage.
_ => break,
};
}
out_str.parse::<f64>().unwrap_or(f64::NAN)
}
pub fn parse_float<'gc>(
activation: &mut Activation<'_, 'gc, '_>,
_this: Object<'gc>,
args: &[Value<'gc>],
) -> Result<Value<'gc>, Error<'gc>> {
let s = if let Some(val) = args.get(0) {
val.coerce_to_string(activation)?
if let Some(value) = args.get(0) {
let string = value.coerce_to_string(activation)?;
Ok(crate::avm1::value::parse_float_impl(&string, false).into())
} else {
return Ok(f64::NAN.into());
};
let s = s.trim_start();
Ok(parse_float_impl(s, true).into())
Ok(Value::Undefined)
}
}
pub fn set_interval<'gc>(

View File

@ -137,8 +137,8 @@ impl<'gc> Value<'gc> {
/// * In SWF6 and lower, `undefined` is coerced to `0.0` (like `false`)
/// rather than `NaN` as required by spec.
/// * In SWF5 and lower, hexadecimal is unsupported.
/// * In SWF4 and lower, a string is coerced using the `parseFloat` function
/// and returns `0.0` rather than `NaN` if it cannot be converted to a number.
/// * In SWF4 and lower, `0.0` is returned rather than `NaN` if a string cannot
/// be converted to a number.
fn primitive_as_number(&self, activation: &mut Activation<'_, 'gc, '_>) -> f64 {
match self {
Value::Undefined if activation.swf_version() < 7 => 0.0,
@ -459,6 +459,32 @@ impl<'gc> Value<'gc> {
}
}
/// Calculate `value * 10^exp` through repeated multiplication or division.
fn decimal_shift(mut value: f64, mut exp: i32) -> f64 {
let mut base: f64 = 10.0;
// The multiply and division branches are intentionally separate to match Flash's behavior.
if exp > 0 {
while exp > 0 {
if (exp & 1) != 0 {
value *= base;
}
exp >>= 1;
base *= base;
}
} else {
// Avoid overflow when `exp == i32::MIN`.
let mut exp = exp.unsigned_abs();
while exp > 0 {
if (exp & 1) != 0 {
value /= base;
}
exp >>= 1;
base *= base;
}
};
value
}
/// Converts an `f64` to a String with (hopefully) the same output as Flash AVM1.
/// 15 digits are displayed (not including leading 0s in a decimal <1).
/// Exponential notation is used for numbers <= 1e-5 and >= 1e15.
@ -511,31 +537,6 @@ fn f64_to_string(mut n: f64) -> Cow<'static, str> {
const LOG10_2: f64 = 0.301029995663981; // log_10(2) value (less precise than Rust's f64::LOG10_2).
let mut exp = f64::round(f64::from(exp_base2) * LOG10_2) as i32;
// Calculate `value * 10^exp` through repeated multiplication or division.
fn decimal_shift(mut value: f64, mut exp: i32) -> f64 {
let mut base: f64 = 10.0;
// The multiply and division branches are intentionally separate to match Flash's behavior.
if exp > 0 {
while exp > 0 {
if (exp & 1) != 0 {
value *= base;
}
exp >>= 1;
base *= base;
}
} else {
exp = -exp;
while exp > 0 {
if (exp & 1) != 0 {
value /= base;
}
exp >>= 1;
base *= base;
}
};
value
}
// Shift the decimal value so that it's in the range of [0.0, 10.0).
let mut mantissa: f64 = decimal_shift(n, -exp);
@ -671,51 +672,170 @@ fn f64_to_string(mut n: f64) -> Cow<'static, str> {
}
}
/// Converts a `WStr` to an f64 based on the SWF version.
fn string_to_f64(str: &WStr, swf_version: u8) -> f64 {
if swf_version < 5 {
use crate::avm1::globals::parse_float_impl;
let v = parse_float_impl(str.trim_start(), true);
if v.is_nan() {
return 0.0;
/// Consumes an optional sign character.
/// Returns whether a minus sign was consumed.
fn parse_sign(s: &mut &WStr) -> bool {
if let Some(after_sign) = s.strip_prefix(b'-') {
*s = after_sign;
true
} else if let Some(after_sign) = s.strip_prefix(b'+') {
*s = after_sign;
false
} else {
false
}
return v;
}
/// Converts a `WStr` to an `f64`.
///
/// This function might fail for some invalid inputs, by returning `NaN`.
///
/// `strict` typically tells whether to behave like `Number()` or `parseFloat()`:
/// * `strict == true` fails on trailing garbage (like `Number()`).
/// * `strict == false` ignores trailing garbage (like `parseFloat()`).
pub fn parse_float_impl(mut s: &WStr, strict: bool) -> f64 {
fn is_ascii_digit(c: u16) -> bool {
u8::try_from(c).map_or(false, |c| c.is_ascii_digit())
}
if str.is_empty() {
// Allow leading whitespace.
s = s.trim_start();
// Parse sign.
let is_negative = parse_sign(&mut s);
let after_sign = s;
// Validate digits before decimal point.
s = s.trim_start_matches(is_ascii_digit);
let mut exp = (after_sign.len() - s.len()) as i32 - 1;
// Validate digits after decimal point.
if let Some(after_dot) = s.strip_prefix(b'.') {
s = after_dot;
s = s.trim_start_matches(is_ascii_digit);
}
// Fail if we got no digits.
// TODO: Compare by reference instead?
if s.len() == after_sign.len() {
return f64::NAN;
}
if swf_version >= 6 {
if let Some(v) = str.strip_prefix(WStr::from_units(b"0x")) {
// Flash allows the '-' sign here.
return match Wrapping::<i32>::from_wstr_radix(v, 16) {
Ok(n) => f64::from(n.0 as i32),
Err(_) => f64::NAN,
};
} else if str.starts_with(b'0')
|| str.starts_with(WStr::from_units(b"+0"))
|| str.starts_with(WStr::from_units(b"-0"))
// Handle exponent.
if let Some(after_e) = s.strip_prefix(b"eE".as_ref()) {
s = after_e;
// Parse exponent sign.
let exponent_is_negative = parse_sign(&mut s);
// Parse exponent itself.
let mut exponent: i32 = 0;
s = s.trim_start_matches(|c| {
match u8::try_from(c)
.ok()
.and_then(|c| char::from(c).to_digit(10))
{
// Flash allows the '-' sign here.
if let Ok(n) = Wrapping::<i32>::from_wstr_radix(str, 8) {
return f64::from(n.0);
Some(digit) => {
exponent = exponent.wrapping_mul(10);
exponent = exponent.wrapping_add(digit as i32);
true
}
None => false,
}
});
// Apply exponent sign.
if exponent_is_negative {
exponent = exponent.wrapping_neg();
}
exp = exp.wrapping_add(exponent);
}
// Fail if we got digits, but we're in strict mode and not at end of string.
if strict && !s.is_empty() {
return f64::NAN;
}
// Finally, calculate the result.
let mut result = 0.0;
for c in after_sign {
if let Some(digit) = u8::try_from(c)
.ok()
.and_then(|c| char::from(c).to_digit(10))
{
result += decimal_shift(digit.into(), exp);
exp = exp.wrapping_sub(1);
} else if c == b'.' as u16 {
// Allow multiple dots.
} else {
break;
}
}
// Rust parses "inf", "+inf" and "infinity" into Infinity, but Flash doesn't.
// Check if the string starts with 'i' (ignoring any leading +/-).
if str
.strip_prefix(&b"+-"[..])
.unwrap_or(str)
.starts_with(&b"iI"[..])
{
f64::NAN
// Apply sign.
if is_negative {
result = -result;
}
// We shouldn't return `NaN` after a successful parsing.
debug_assert!(!result.is_nan());
result
}
/// Guess the radix of a string.
///
/// With an optional leading sign omitted:
/// * Strings that start with `0x` (case insensitive) are considered hexadecimal.
/// * Strings that start with a `0` and consist only of `0..=7` digits are considered octal.
/// * All other strings are considered decimal.
fn guess_radix(s: &WStr) -> u32 {
// Optionally skip sign.
let s = s.strip_prefix(b"+-".as_ref()).unwrap_or(s);
if let Some(s) = s.strip_prefix(b'0') {
if s.starts_with(b"xX".as_ref()) {
// Hexadecimal.
return 16;
}
if s.iter().all(|c| c >= b'0' as u16 && c <= b'7' as u16) {
// Octal.
return 8;
}
}
// Decimal.
10
}
/// Converts a `WStr` to an `f64` based on the SWF version.
fn string_to_f64(mut s: &WStr, swf_version: u8) -> f64 {
if swf_version >= 6 {
let radix = guess_radix(s);
// Parse hexadecimal and octal numbers as integers.
if radix != 10 {
if radix == 16 {
// Bug compatibility: Flash fails to skip an hexadecimal prefix with a sign,
// causing such strings to be parsed as `NaN`.
s = &s[2..];
}
return match Wrapping::<i32>::from_wstr_radix(s, radix) {
Ok(result) => result.0.into(),
Err(_) => f64::NAN,
};
}
}
let strict = swf_version >= 5;
let result = parse_float_impl(s, strict);
if !strict && result.is_nan() {
// In non-strict mode, return `0.0` rather than `NaN`.
0.0
} else {
str.trim_start_matches(&b"\t\n\r "[..])
.parse()
.unwrap_or(f64::NAN)
result
}
}