swf: Parse string as Vec<u8> instead of String in SWF
Some obfuscated SWFs may have invalid strings in their constant pool - trying to immediately parse them as a utf-8 String throws away information. Instead, we now store a `Vec<u8>`, which we then use to construct an `AvmString` (or with `String::from_utf8_lossy` for debug printing).
This commit is contained in:
parent
8dbcfe26f9
commit
c1fce1106f
|
@ -5,6 +5,7 @@ use convert_case::{Boundary, Case, Casing};
|
|||
use proc_macro2::TokenStream;
|
||||
use quote::quote;
|
||||
use regex::RegexBuilder;
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
|
@ -113,9 +114,9 @@ pub fn build_playerglobal(
|
|||
|
||||
// Resolve the 'name' field of a `Multiname`. This only handles the cases
|
||||
// that we need for our custom `playerglobal.swf` (
|
||||
fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str {
|
||||
fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> Cow<'a, str> {
|
||||
if let Multiname::QName { name, .. } | Multiname::Multiname { name, .. } = multiname {
|
||||
&abc.constant_pool.strings[name.0 as usize - 1]
|
||||
String::from_utf8_lossy(&abc.constant_pool.strings[name.0 as usize - 1])
|
||||
} else {
|
||||
panic!("Unexpected Multiname {multiname:?}");
|
||||
}
|
||||
|
@ -124,21 +125,21 @@ fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a st
|
|||
// Strips off the version mark inserted by 'asc.jar',
|
||||
// giving us a valid Rust module name. The actual versioning logic
|
||||
// is handling in Ruffle when we load playerglobals
|
||||
fn strip_version_mark(val: &str) -> &str {
|
||||
fn strip_version_mark(val: Cow<'_, str>) -> Cow<'_, str> {
|
||||
const MIN_API_MARK: usize = 0xE000;
|
||||
const MAX_API_MARK: usize = 0xF8FF;
|
||||
|
||||
if let Some(chr) = val.chars().last() {
|
||||
if chr as usize >= MIN_API_MARK && chr as usize <= MAX_API_MARK {
|
||||
// The version mark is 3 bytes in utf-8
|
||||
return &val[..val.len() - 3];
|
||||
return val[..val.len() - 3].to_string().into();
|
||||
}
|
||||
}
|
||||
val
|
||||
}
|
||||
|
||||
// Like `resolve_multiname_name`, but for namespaces instead.
|
||||
fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str {
|
||||
fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> Cow<'a, str> {
|
||||
let ns = match multiname {
|
||||
Multiname::QName { namespace, .. } => {
|
||||
&abc.constant_pool.namespaces[namespace.0 as usize - 1]
|
||||
|
@ -168,7 +169,7 @@ fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str
|
|||
} else {
|
||||
panic!("Unexpected Namespace {ns:?}");
|
||||
};
|
||||
strip_version_mark(namespace)
|
||||
strip_version_mark(String::from_utf8_lossy(namespace))
|
||||
}
|
||||
|
||||
fn flash_to_rust_path(path: &str) -> String {
|
||||
|
@ -222,7 +223,7 @@ fn rust_method_name_and_path(
|
|||
// For example, a namespace of "flash.system" and a name of "Security"
|
||||
// turns into the path "flash::system::security"
|
||||
let multiname = &abc.constant_pool.multinames[parent.0 as usize - 1];
|
||||
let ns = flash_to_rust_path(resolve_multiname_ns(abc, multiname));
|
||||
let ns = flash_to_rust_path(&resolve_multiname_ns(abc, multiname));
|
||||
if !ns.is_empty() {
|
||||
path += &ns;
|
||||
path += "::";
|
||||
|
@ -231,10 +232,10 @@ fn rust_method_name_and_path(
|
|||
flash_method_path += "::";
|
||||
}
|
||||
let name = resolve_multiname_name(abc, multiname);
|
||||
path += &flash_to_rust_path(name);
|
||||
path += &flash_to_rust_path(&name);
|
||||
path += "::";
|
||||
|
||||
flash_method_path += name;
|
||||
flash_method_path += &*name;
|
||||
flash_method_path += "::";
|
||||
} else {
|
||||
// This is a freestanding function. Append its namespace (the package).
|
||||
|
@ -242,9 +243,9 @@ fn rust_method_name_and_path(
|
|||
// has a namespace of "flash.utils", which turns into the path
|
||||
// "flash::utils"
|
||||
let name = resolve_multiname_ns(abc, trait_name);
|
||||
let ns = &flash_to_rust_path(name);
|
||||
let ns = &flash_to_rust_path(&name);
|
||||
path += ns;
|
||||
flash_method_path += name;
|
||||
flash_method_path += &name;
|
||||
if !ns.is_empty() {
|
||||
path += "::";
|
||||
flash_method_path += "::";
|
||||
|
@ -255,10 +256,10 @@ fn rust_method_name_and_path(
|
|||
// name (e.g. `getDefinitionByName`)
|
||||
path += prefix;
|
||||
|
||||
let name = resolve_multiname_name(abc, trait_name);
|
||||
let name = resolve_multiname_name(abc, trait_name).to_string();
|
||||
|
||||
path += &flash_to_rust_path(name);
|
||||
flash_method_path += name;
|
||||
path += &flash_to_rust_path(&name);
|
||||
flash_method_path += &name;
|
||||
|
||||
path += suffix;
|
||||
|
||||
|
@ -374,14 +375,15 @@ fn write_native_table(data: &[u8], out_dir: &Path) -> Result<Vec<u8>, Box<dyn st
|
|||
);
|
||||
|
||||
let instance_allocator_method_name =
|
||||
"::".to_string() + &flash_to_rust_path(class_name) + "_allocator";
|
||||
"::".to_string() + &flash_to_rust_path(&class_name) + "_allocator";
|
||||
let native_instance_init_method_name = "::native_instance_init".to_string();
|
||||
let call_handler_method_name = "::call_handler".to_string();
|
||||
for metadata_idx in &trait_.metadata {
|
||||
let metadata = &abc.metadata[metadata_idx.0 as usize];
|
||||
let name = &abc.constant_pool.strings[metadata.name.0 as usize - 1];
|
||||
let name =
|
||||
String::from_utf8_lossy(&abc.constant_pool.strings[metadata.name.0 as usize - 1]);
|
||||
|
||||
let is_versioning = match name.as_str() {
|
||||
let is_versioning = match &*name {
|
||||
RUFFLE_METADATA_NAME => false,
|
||||
API_METADATA_NAME => true,
|
||||
_ => panic!("Unexpected class metadata {name:?}"),
|
||||
|
@ -389,12 +391,13 @@ fn write_native_table(data: &[u8], out_dir: &Path) -> Result<Vec<u8>, Box<dyn st
|
|||
|
||||
for item in &metadata.items {
|
||||
let key = if item.key.0 != 0 {
|
||||
Some(abc.constant_pool.strings[item.key.0 as usize - 1].as_str())
|
||||
Some(&abc.constant_pool.strings[item.key.0 as usize - 1])
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let value = &abc.constant_pool.strings[item.value.0 as usize - 1];
|
||||
match (key, value.as_str()) {
|
||||
let value =
|
||||
String::from_utf8_lossy(&abc.constant_pool.strings[item.value.0 as usize - 1]);
|
||||
match (key, &*value) {
|
||||
// Match `[Ruffle(InstanceAllocator)]`
|
||||
(None, METADATA_INSTANCE_ALLOCATOR) if !is_versioning => {
|
||||
// This results in a path of the form
|
||||
|
|
|
@ -329,14 +329,14 @@ pub fn display_function<'gc>(
|
|||
// SWF's with debug information will provide a method name attached
|
||||
// to the method definition, so we can use that.
|
||||
output.push_char('/');
|
||||
output.push_utf8(method.method_name());
|
||||
output.push_utf8(&method.method_name());
|
||||
}
|
||||
// TODO: What happens if we can't find the trait?
|
||||
}
|
||||
// We purposely do nothing for instance initializers
|
||||
} else if method.is_function && !method.method_name().is_empty() {
|
||||
output.push_utf8("Function/");
|
||||
output.push_utf8(method.method_name());
|
||||
output.push_utf8(&method.method_name());
|
||||
} else {
|
||||
output.push_utf8("MethodInfo-");
|
||||
output.push_utf8(&method.abc_method.to_string());
|
||||
|
|
|
@ -11,6 +11,7 @@ use crate::tag_utils::SwfMovie;
|
|||
use gc_arena::barrier::unlock;
|
||||
use gc_arena::lock::Lock;
|
||||
use gc_arena::{Collect, Gc, Mutation};
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use std::ops::Deref;
|
||||
use std::rc::Rc;
|
||||
|
@ -223,18 +224,18 @@ impl<'gc> BytecodeMethod<'gc> {
|
|||
}
|
||||
|
||||
/// Get the name of this method.
|
||||
pub fn method_name(&self) -> &str {
|
||||
pub fn method_name(&self) -> Cow<'_, str> {
|
||||
let name_index = self.method().name.0 as usize;
|
||||
if name_index == 0 {
|
||||
return "";
|
||||
return Cow::Borrowed("");
|
||||
}
|
||||
|
||||
self.abc
|
||||
.constant_pool
|
||||
.strings
|
||||
.get(name_index - 1)
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or("")
|
||||
.map(|s| String::from_utf8_lossy(s))
|
||||
.unwrap_or(Cow::Borrowed(""))
|
||||
}
|
||||
|
||||
/// Determine if a given method is variadic.
|
||||
|
|
|
@ -286,7 +286,7 @@ impl<'gc> TranslationUnit<'gc> {
|
|||
}
|
||||
|
||||
let raw = if string_index == 0 {
|
||||
""
|
||||
&[]
|
||||
} else {
|
||||
write
|
||||
.abc
|
||||
|
@ -294,11 +294,12 @@ impl<'gc> TranslationUnit<'gc> {
|
|||
.strings
|
||||
.get(string_index as usize - 1)
|
||||
.ok_or_else(|| format!("Unknown string constant {string_index}"))?
|
||||
.as_slice()
|
||||
};
|
||||
|
||||
let atom = context
|
||||
.interner
|
||||
.intern_wstr(context.gc_context, ruffle_wstr::from_utf8(raw));
|
||||
.intern_wstr(context.gc_context, ruffle_wstr::from_utf8_bytes(raw));
|
||||
|
||||
write.strings[string_index as usize] = Some(atom);
|
||||
Ok(atom)
|
||||
|
|
|
@ -99,11 +99,11 @@ impl<'a> Reader<'a> {
|
|||
Ok(self.read_encoded_u32()? as i32)
|
||||
}
|
||||
|
||||
fn read_string(&mut self) -> Result<String> {
|
||||
fn read_string(&mut self) -> Result<Vec<u8>> {
|
||||
let len = self.read_u30()?;
|
||||
// TODO: Avoid allocating a String.
|
||||
let mut s = String::with_capacity(len as usize);
|
||||
self.read_slice(len as usize)?.read_to_string(&mut s)?;
|
||||
let mut s = Vec::with_capacity(len as usize);
|
||||
self.read_slice(len as usize)?.read_to_end(&mut s)?;
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ pub struct ConstantPool {
|
|||
pub ints: Vec<i32>,
|
||||
pub uints: Vec<u32>,
|
||||
pub doubles: Vec<f64>,
|
||||
pub strings: Vec<String>,
|
||||
pub strings: Vec<Vec<u8>>,
|
||||
pub namespaces: Vec<Namespace>,
|
||||
pub namespace_sets: Vec<NamespaceSet>,
|
||||
pub multinames: Vec<Multiname>,
|
||||
|
|
|
@ -139,9 +139,9 @@ impl<W: Write> Writer<W> {
|
|||
self.write_u30(i.0)
|
||||
}
|
||||
|
||||
fn write_string(&mut self, s: &str) -> Result<()> {
|
||||
fn write_string(&mut self, s: &[u8]) -> Result<()> {
|
||||
self.write_u30(s.len() as u32)?;
|
||||
self.output.write_all(s.as_bytes())?;
|
||||
self.output.write_all(s)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
@ -2659,11 +2659,11 @@ pub fn avm2_tests() -> Vec<Avm2TestData> {
|
|||
uints: vec![],
|
||||
doubles: vec![],
|
||||
strings: vec![
|
||||
"".to_string(),
|
||||
"void".to_string(),
|
||||
"Avm2Test".to_string(),
|
||||
"trace".to_string(),
|
||||
"Test".to_string(),
|
||||
"".to_string().into_bytes(),
|
||||
"void".to_string().into_bytes(),
|
||||
"Avm2Test".to_string().into_bytes(),
|
||||
"trace".to_string().into_bytes(),
|
||||
"Test".to_string().into_bytes(),
|
||||
],
|
||||
namespaces: vec![Namespace::Package(Index::new(1))],
|
||||
namespace_sets: vec![],
|
||||
|
|
Loading…
Reference in New Issue