From c1fce1106fc3f48e29aef2ca19aa8df4065784ba Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Mon, 22 Jan 2024 19:45:48 -0500 Subject: [PATCH] swf: Parse string as Vec instead of String in SWF Some obfuscated SWFs may have invalid strings in their constant pool - trying to immediately parse them as a utf-8 String throws away information. Instead, we now store a `Vec`, which we then use to construct an `AvmString` (or with `String::from_utf8_lossy` for debug printing). --- core/build_playerglobal/src/lib.rs | 43 ++++++++++++++++-------------- core/src/avm2/function.rs | 4 +-- core/src/avm2/method.rs | 9 ++++--- core/src/avm2/script.rs | 5 ++-- swf/src/avm2/read.rs | 6 ++--- swf/src/avm2/types.rs | 2 +- swf/src/avm2/write.rs | 4 +-- swf/src/test_data.rs | 10 +++---- 8 files changed, 44 insertions(+), 39 deletions(-) diff --git a/core/build_playerglobal/src/lib.rs b/core/build_playerglobal/src/lib.rs index d2690876c..7e6721eac 100644 --- a/core/build_playerglobal/src/lib.rs +++ b/core/build_playerglobal/src/lib.rs @@ -5,6 +5,7 @@ use convert_case::{Boundary, Case, Casing}; use proc_macro2::TokenStream; use quote::quote; use regex::RegexBuilder; +use std::borrow::Cow; use std::ffi::OsStr; use std::fs; use std::fs::File; @@ -113,9 +114,9 @@ pub fn build_playerglobal( // Resolve the 'name' field of a `Multiname`. This only handles the cases // that we need for our custom `playerglobal.swf` ( -fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str { +fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> Cow<'a, str> { if let Multiname::QName { name, .. } | Multiname::Multiname { name, .. } = multiname { - &abc.constant_pool.strings[name.0 as usize - 1] + String::from_utf8_lossy(&abc.constant_pool.strings[name.0 as usize - 1]) } else { panic!("Unexpected Multiname {multiname:?}"); } @@ -124,21 +125,21 @@ fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a st // Strips off the version mark inserted by 'asc.jar', // giving us a valid Rust module name. The actual versioning logic // is handling in Ruffle when we load playerglobals -fn strip_version_mark(val: &str) -> &str { +fn strip_version_mark(val: Cow<'_, str>) -> Cow<'_, str> { const MIN_API_MARK: usize = 0xE000; const MAX_API_MARK: usize = 0xF8FF; if let Some(chr) = val.chars().last() { if chr as usize >= MIN_API_MARK && chr as usize <= MAX_API_MARK { // The version mark is 3 bytes in utf-8 - return &val[..val.len() - 3]; + return val[..val.len() - 3].to_string().into(); } } val } // Like `resolve_multiname_name`, but for namespaces instead. -fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str { +fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> Cow<'a, str> { let ns = match multiname { Multiname::QName { namespace, .. } => { &abc.constant_pool.namespaces[namespace.0 as usize - 1] @@ -168,7 +169,7 @@ fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str } else { panic!("Unexpected Namespace {ns:?}"); }; - strip_version_mark(namespace) + strip_version_mark(String::from_utf8_lossy(namespace)) } fn flash_to_rust_path(path: &str) -> String { @@ -222,7 +223,7 @@ fn rust_method_name_and_path( // For example, a namespace of "flash.system" and a name of "Security" // turns into the path "flash::system::security" let multiname = &abc.constant_pool.multinames[parent.0 as usize - 1]; - let ns = flash_to_rust_path(resolve_multiname_ns(abc, multiname)); + let ns = flash_to_rust_path(&resolve_multiname_ns(abc, multiname)); if !ns.is_empty() { path += &ns; path += "::"; @@ -231,10 +232,10 @@ fn rust_method_name_and_path( flash_method_path += "::"; } let name = resolve_multiname_name(abc, multiname); - path += &flash_to_rust_path(name); + path += &flash_to_rust_path(&name); path += "::"; - flash_method_path += name; + flash_method_path += &*name; flash_method_path += "::"; } else { // This is a freestanding function. Append its namespace (the package). @@ -242,9 +243,9 @@ fn rust_method_name_and_path( // has a namespace of "flash.utils", which turns into the path // "flash::utils" let name = resolve_multiname_ns(abc, trait_name); - let ns = &flash_to_rust_path(name); + let ns = &flash_to_rust_path(&name); path += ns; - flash_method_path += name; + flash_method_path += &name; if !ns.is_empty() { path += "::"; flash_method_path += "::"; @@ -255,10 +256,10 @@ fn rust_method_name_and_path( // name (e.g. `getDefinitionByName`) path += prefix; - let name = resolve_multiname_name(abc, trait_name); + let name = resolve_multiname_name(abc, trait_name).to_string(); - path += &flash_to_rust_path(name); - flash_method_path += name; + path += &flash_to_rust_path(&name); + flash_method_path += &name; path += suffix; @@ -374,14 +375,15 @@ fn write_native_table(data: &[u8], out_dir: &Path) -> Result, Box false, API_METADATA_NAME => true, _ => panic!("Unexpected class metadata {name:?}"), @@ -389,12 +391,13 @@ fn write_native_table(data: &[u8], out_dir: &Path) -> Result, Box { // This results in a path of the form diff --git a/core/src/avm2/function.rs b/core/src/avm2/function.rs index 5dc0fe6ca..c17f43dcb 100644 --- a/core/src/avm2/function.rs +++ b/core/src/avm2/function.rs @@ -329,14 +329,14 @@ pub fn display_function<'gc>( // SWF's with debug information will provide a method name attached // to the method definition, so we can use that. output.push_char('/'); - output.push_utf8(method.method_name()); + output.push_utf8(&method.method_name()); } // TODO: What happens if we can't find the trait? } // We purposely do nothing for instance initializers } else if method.is_function && !method.method_name().is_empty() { output.push_utf8("Function/"); - output.push_utf8(method.method_name()); + output.push_utf8(&method.method_name()); } else { output.push_utf8("MethodInfo-"); output.push_utf8(&method.abc_method.to_string()); diff --git a/core/src/avm2/method.rs b/core/src/avm2/method.rs index df550b0e9..ebd740664 100644 --- a/core/src/avm2/method.rs +++ b/core/src/avm2/method.rs @@ -11,6 +11,7 @@ use crate::tag_utils::SwfMovie; use gc_arena::barrier::unlock; use gc_arena::lock::Lock; use gc_arena::{Collect, Gc, Mutation}; +use std::borrow::Cow; use std::fmt; use std::ops::Deref; use std::rc::Rc; @@ -223,18 +224,18 @@ impl<'gc> BytecodeMethod<'gc> { } /// Get the name of this method. - pub fn method_name(&self) -> &str { + pub fn method_name(&self) -> Cow<'_, str> { let name_index = self.method().name.0 as usize; if name_index == 0 { - return ""; + return Cow::Borrowed(""); } self.abc .constant_pool .strings .get(name_index - 1) - .map(|s| s.as_str()) - .unwrap_or("") + .map(|s| String::from_utf8_lossy(s)) + .unwrap_or(Cow::Borrowed("")) } /// Determine if a given method is variadic. diff --git a/core/src/avm2/script.rs b/core/src/avm2/script.rs index 8951313f0..9fc03bbb9 100644 --- a/core/src/avm2/script.rs +++ b/core/src/avm2/script.rs @@ -286,7 +286,7 @@ impl<'gc> TranslationUnit<'gc> { } let raw = if string_index == 0 { - "" + &[] } else { write .abc @@ -294,11 +294,12 @@ impl<'gc> TranslationUnit<'gc> { .strings .get(string_index as usize - 1) .ok_or_else(|| format!("Unknown string constant {string_index}"))? + .as_slice() }; let atom = context .interner - .intern_wstr(context.gc_context, ruffle_wstr::from_utf8(raw)); + .intern_wstr(context.gc_context, ruffle_wstr::from_utf8_bytes(raw)); write.strings[string_index as usize] = Some(atom); Ok(atom) diff --git a/swf/src/avm2/read.rs b/swf/src/avm2/read.rs index dffea182d..ee4a926ee 100644 --- a/swf/src/avm2/read.rs +++ b/swf/src/avm2/read.rs @@ -99,11 +99,11 @@ impl<'a> Reader<'a> { Ok(self.read_encoded_u32()? as i32) } - fn read_string(&mut self) -> Result { + fn read_string(&mut self) -> Result> { let len = self.read_u30()?; // TODO: Avoid allocating a String. - let mut s = String::with_capacity(len as usize); - self.read_slice(len as usize)?.read_to_string(&mut s)?; + let mut s = Vec::with_capacity(len as usize); + self.read_slice(len as usize)?.read_to_end(&mut s)?; Ok(s) } diff --git a/swf/src/avm2/types.rs b/swf/src/avm2/types.rs index 37ab78310..34a6b9857 100644 --- a/swf/src/avm2/types.rs +++ b/swf/src/avm2/types.rs @@ -21,7 +21,7 @@ pub struct ConstantPool { pub ints: Vec, pub uints: Vec, pub doubles: Vec, - pub strings: Vec, + pub strings: Vec>, pub namespaces: Vec, pub namespace_sets: Vec, pub multinames: Vec, diff --git a/swf/src/avm2/write.rs b/swf/src/avm2/write.rs index 04bb0f717..2d3232e33 100644 --- a/swf/src/avm2/write.rs +++ b/swf/src/avm2/write.rs @@ -139,9 +139,9 @@ impl Writer { self.write_u30(i.0) } - fn write_string(&mut self, s: &str) -> Result<()> { + fn write_string(&mut self, s: &[u8]) -> Result<()> { self.write_u30(s.len() as u32)?; - self.output.write_all(s.as_bytes())?; + self.output.write_all(s)?; Ok(()) } diff --git a/swf/src/test_data.rs b/swf/src/test_data.rs index 42c325c6e..307cae450 100644 --- a/swf/src/test_data.rs +++ b/swf/src/test_data.rs @@ -2659,11 +2659,11 @@ pub fn avm2_tests() -> Vec { uints: vec![], doubles: vec![], strings: vec![ - "".to_string(), - "void".to_string(), - "Avm2Test".to_string(), - "trace".to_string(), - "Test".to_string(), + "".to_string().into_bytes(), + "void".to_string().into_bytes(), + "Avm2Test".to_string().into_bytes(), + "trace".to_string().into_bytes(), + "Test".to_string().into_bytes(), ], namespaces: vec![Namespace::Package(Index::new(1))], namespace_sets: vec![],