From f0db6d8c6064815d14c3f12261e3548422a47fed Mon Sep 17 00:00:00 2001 From: relrelb Date: Sat, 25 Jun 2022 16:47:51 +0300 Subject: [PATCH] avm1: Correct `parseFloat()` Rewrite the implementation of `string_to_f64()` to match Flash behavior. This affects `parseFloat()` as well as any `Value` to `f64` coercion. --- core/src/avm1/globals.rs | 69 +---------- core/src/avm1/value.rs | 244 +++++++++++++++++++++++++++++---------- 2 files changed, 187 insertions(+), 126 deletions(-) diff --git a/core/src/avm1/globals.rs b/core/src/avm1/globals.rs index 06a0e8c9f..091d5feca 100644 --- a/core/src/avm1/globals.rs +++ b/core/src/avm1/globals.rs @@ -254,76 +254,17 @@ pub fn get_nan<'gc>( } } -pub fn parse_float_impl(s: &WStr, allow_multiple_dots: bool) -> f64 { - let mut out_str = String::with_capacity(s.len()); - - // TODO: Implementing this in a very janky way for now, - // feeding the string to Rust's float parser. - // Flash's parser is much more lenient, so we have to massage - // the string into an acceptable format. - let mut allow_dot = true; - let mut allow_exp = true; - let mut allow_sign = true; - for unit in s.iter() { - let c = match u8::try_from(unit) { - Ok(c) => c, - // Invalid char, `parseFloat` ignores all trailing garbage. - Err(_) => break, - }; - - match c { - b'0'..=b'9' => { - allow_sign = false; - out_str.push(c.into()); - } - b'+' | b'-' if allow_sign => { - // Sign allowed at first char and following e - allow_sign = false; - out_str.push(c.into()); - } - b'.' if allow_exp => { - allow_sign = false; - if allow_dot { - allow_dot = false; - out_str.push(c.into()); - } else { - // AVM1 allows multiple . except after e - if allow_multiple_dots { - allow_exp = false; - } else { - break; - } - } - } - b'e' | b'E' if allow_exp => { - allow_sign = true; - allow_exp = false; - allow_dot = false; - out_str.push(c.into()); - } - - // Invalid char, `parseFloat` ignores all trailing garbage. - _ => break, - }; - } - - out_str.parse::().unwrap_or(f64::NAN) -} - pub fn parse_float<'gc>( activation: &mut Activation<'_, 'gc, '_>, _this: Object<'gc>, args: &[Value<'gc>], ) -> Result, Error<'gc>> { - let s = if let Some(val) = args.get(0) { - val.coerce_to_string(activation)? + if let Some(value) = args.get(0) { + let string = value.coerce_to_string(activation)?; + Ok(crate::avm1::value::parse_float_impl(&string, false).into()) } else { - return Ok(f64::NAN.into()); - }; - - let s = s.trim_start(); - - Ok(parse_float_impl(s, true).into()) + Ok(Value::Undefined) + } } pub fn set_interval<'gc>( diff --git a/core/src/avm1/value.rs b/core/src/avm1/value.rs index cf3475aa8..70efa2fb4 100644 --- a/core/src/avm1/value.rs +++ b/core/src/avm1/value.rs @@ -137,8 +137,8 @@ impl<'gc> Value<'gc> { /// * In SWF6 and lower, `undefined` is coerced to `0.0` (like `false`) /// rather than `NaN` as required by spec. /// * In SWF5 and lower, hexadecimal is unsupported. - /// * In SWF4 and lower, a string is coerced using the `parseFloat` function - /// and returns `0.0` rather than `NaN` if it cannot be converted to a number. + /// * In SWF4 and lower, `0.0` is returned rather than `NaN` if a string cannot + /// be converted to a number. fn primitive_as_number(&self, activation: &mut Activation<'_, 'gc, '_>) -> f64 { match self { Value::Undefined if activation.swf_version() < 7 => 0.0, @@ -459,6 +459,32 @@ impl<'gc> Value<'gc> { } } +/// Calculate `value * 10^exp` through repeated multiplication or division. +fn decimal_shift(mut value: f64, mut exp: i32) -> f64 { + let mut base: f64 = 10.0; + // The multiply and division branches are intentionally separate to match Flash's behavior. + if exp > 0 { + while exp > 0 { + if (exp & 1) != 0 { + value *= base; + } + exp >>= 1; + base *= base; + } + } else { + // Avoid overflow when `exp == i32::MIN`. + let mut exp = exp.unsigned_abs(); + while exp > 0 { + if (exp & 1) != 0 { + value /= base; + } + exp >>= 1; + base *= base; + } + }; + value +} + /// Converts an `f64` to a String with (hopefully) the same output as Flash AVM1. /// 15 digits are displayed (not including leading 0s in a decimal <1). /// Exponential notation is used for numbers <= 1e-5 and >= 1e15. @@ -511,31 +537,6 @@ fn f64_to_string(mut n: f64) -> Cow<'static, str> { const LOG10_2: f64 = 0.301029995663981; // log_10(2) value (less precise than Rust's f64::LOG10_2). let mut exp = f64::round(f64::from(exp_base2) * LOG10_2) as i32; - // Calculate `value * 10^exp` through repeated multiplication or division. - fn decimal_shift(mut value: f64, mut exp: i32) -> f64 { - let mut base: f64 = 10.0; - // The multiply and division branches are intentionally separate to match Flash's behavior. - if exp > 0 { - while exp > 0 { - if (exp & 1) != 0 { - value *= base; - } - exp >>= 1; - base *= base; - } - } else { - exp = -exp; - while exp > 0 { - if (exp & 1) != 0 { - value /= base; - } - exp >>= 1; - base *= base; - } - }; - value - } - // Shift the decimal value so that it's in the range of [0.0, 10.0). let mut mantissa: f64 = decimal_shift(n, -exp); @@ -671,51 +672,170 @@ fn f64_to_string(mut n: f64) -> Cow<'static, str> { } } -/// Converts a `WStr` to an f64 based on the SWF version. -fn string_to_f64(str: &WStr, swf_version: u8) -> f64 { - if swf_version < 5 { - use crate::avm1::globals::parse_float_impl; - let v = parse_float_impl(str.trim_start(), true); - if v.is_nan() { - return 0.0; - } - return v; +/// Consumes an optional sign character. +/// Returns whether a minus sign was consumed. +fn parse_sign(s: &mut &WStr) -> bool { + if let Some(after_sign) = s.strip_prefix(b'-') { + *s = after_sign; + true + } else if let Some(after_sign) = s.strip_prefix(b'+') { + *s = after_sign; + false + } else { + false + } +} + +/// Converts a `WStr` to an `f64`. +/// +/// This function might fail for some invalid inputs, by returning `NaN`. +/// +/// `strict` typically tells whether to behave like `Number()` or `parseFloat()`: +/// * `strict == true` fails on trailing garbage (like `Number()`). +/// * `strict == false` ignores trailing garbage (like `parseFloat()`). +pub fn parse_float_impl(mut s: &WStr, strict: bool) -> f64 { + fn is_ascii_digit(c: u16) -> bool { + u8::try_from(c).map_or(false, |c| c.is_ascii_digit()) } - if str.is_empty() { + // Allow leading whitespace. + s = s.trim_start(); + + // Parse sign. + let is_negative = parse_sign(&mut s); + let after_sign = s; + + // Validate digits before decimal point. + s = s.trim_start_matches(is_ascii_digit); + let mut exp = (after_sign.len() - s.len()) as i32 - 1; + + // Validate digits after decimal point. + if let Some(after_dot) = s.strip_prefix(b'.') { + s = after_dot; + s = s.trim_start_matches(is_ascii_digit); + } + + // Fail if we got no digits. + // TODO: Compare by reference instead? + if s.len() == after_sign.len() { return f64::NAN; } - if swf_version >= 6 { - if let Some(v) = str.strip_prefix(WStr::from_units(b"0x")) { - // Flash allows the '-' sign here. - return match Wrapping::::from_wstr_radix(v, 16) { - Ok(n) => f64::from(n.0 as i32), - Err(_) => f64::NAN, - }; - } else if str.starts_with(b'0') - || str.starts_with(WStr::from_units(b"+0")) - || str.starts_with(WStr::from_units(b"-0")) - { - // Flash allows the '-' sign here. - if let Ok(n) = Wrapping::::from_wstr_radix(str, 8) { - return f64::from(n.0); + // Handle exponent. + if let Some(after_e) = s.strip_prefix(b"eE".as_ref()) { + s = after_e; + + // Parse exponent sign. + let exponent_is_negative = parse_sign(&mut s); + + // Parse exponent itself. + let mut exponent: i32 = 0; + s = s.trim_start_matches(|c| { + match u8::try_from(c) + .ok() + .and_then(|c| char::from(c).to_digit(10)) + { + Some(digit) => { + exponent = exponent.wrapping_mul(10); + exponent = exponent.wrapping_add(digit as i32); + true + } + None => false, } + }); + + // Apply exponent sign. + if exponent_is_negative { + exponent = exponent.wrapping_neg(); + } + + exp = exp.wrapping_add(exponent); + } + + // Fail if we got digits, but we're in strict mode and not at end of string. + if strict && !s.is_empty() { + return f64::NAN; + } + + // Finally, calculate the result. + let mut result = 0.0; + for c in after_sign { + if let Some(digit) = u8::try_from(c) + .ok() + .and_then(|c| char::from(c).to_digit(10)) + { + result += decimal_shift(digit.into(), exp); + exp = exp.wrapping_sub(1); + } else if c == b'.' as u16 { + // Allow multiple dots. + } else { + break; } } - // Rust parses "inf", "+inf" and "infinity" into Infinity, but Flash doesn't. - // Check if the string starts with 'i' (ignoring any leading +/-). - if str - .strip_prefix(&b"+-"[..]) - .unwrap_or(str) - .starts_with(&b"iI"[..]) - { - f64::NAN + // Apply sign. + if is_negative { + result = -result; + } + + // We shouldn't return `NaN` after a successful parsing. + debug_assert!(!result.is_nan()); + result +} + +/// Guess the radix of a string. +/// +/// With an optional leading sign omitted: +/// * Strings that start with `0x` (case insensitive) are considered hexadecimal. +/// * Strings that start with a `0` and consist only of `0..=7` digits are considered octal. +/// * All other strings are considered decimal. +fn guess_radix(s: &WStr) -> u32 { + // Optionally skip sign. + let s = s.strip_prefix(b"+-".as_ref()).unwrap_or(s); + + if let Some(s) = s.strip_prefix(b'0') { + if s.starts_with(b"xX".as_ref()) { + // Hexadecimal. + return 16; + } + + if s.iter().all(|c| c >= b'0' as u16 && c <= b'7' as u16) { + // Octal. + return 8; + } + } + + // Decimal. + 10 +} + +/// Converts a `WStr` to an `f64` based on the SWF version. +fn string_to_f64(mut s: &WStr, swf_version: u8) -> f64 { + if swf_version >= 6 { + let radix = guess_radix(s); + + // Parse hexadecimal and octal numbers as integers. + if radix != 10 { + if radix == 16 { + // Bug compatibility: Flash fails to skip an hexadecimal prefix with a sign, + // causing such strings to be parsed as `NaN`. + s = &s[2..]; + } + + return match Wrapping::::from_wstr_radix(s, radix) { + Ok(result) => result.0.into(), + Err(_) => f64::NAN, + }; + } + } + + let strict = swf_version >= 5; + let result = parse_float_impl(s, strict); + if !strict && result.is_nan() { + // In non-strict mode, return `0.0` rather than `NaN`. + 0.0 } else { - str.trim_start_matches(&b"\t\n\r "[..]) - .parse() - .unwrap_or(f64::NAN) + result } }