swf: Parse string as Vec<u8> instead of String in SWF

Some obfuscated SWFs may have invalid strings in their constant
pool - trying to immediately parse them as a utf-8 String throws
away information. Instead, we now store a `Vec<u8>`, which we
then use to construct an `AvmString` (or with `String::from_utf8_lossy`
for debug printing).
This commit is contained in:
Aaron Hill 2024-01-22 19:45:48 -05:00
parent 8dbcfe26f9
commit c1fce1106f
8 changed files with 44 additions and 39 deletions

View File

@ -5,6 +5,7 @@ use convert_case::{Boundary, Case, Casing};
use proc_macro2::TokenStream;
use quote::quote;
use regex::RegexBuilder;
use std::borrow::Cow;
use std::ffi::OsStr;
use std::fs;
use std::fs::File;
@ -113,9 +114,9 @@ pub fn build_playerglobal(
// Resolve the 'name' field of a `Multiname`. This only handles the cases
// that we need for our custom `playerglobal.swf` (
fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str {
fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> Cow<'a, str> {
if let Multiname::QName { name, .. } | Multiname::Multiname { name, .. } = multiname {
&abc.constant_pool.strings[name.0 as usize - 1]
String::from_utf8_lossy(&abc.constant_pool.strings[name.0 as usize - 1])
} else {
panic!("Unexpected Multiname {multiname:?}");
}
@ -124,21 +125,21 @@ fn resolve_multiname_name<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a st
// Strips off the version mark inserted by 'asc.jar',
// giving us a valid Rust module name. The actual versioning logic
// is handling in Ruffle when we load playerglobals
fn strip_version_mark(val: &str) -> &str {
fn strip_version_mark(val: Cow<'_, str>) -> Cow<'_, str> {
const MIN_API_MARK: usize = 0xE000;
const MAX_API_MARK: usize = 0xF8FF;
if let Some(chr) = val.chars().last() {
if chr as usize >= MIN_API_MARK && chr as usize <= MAX_API_MARK {
// The version mark is 3 bytes in utf-8
return &val[..val.len() - 3];
return val[..val.len() - 3].to_string().into();
}
}
val
}
// Like `resolve_multiname_name`, but for namespaces instead.
fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str {
fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> Cow<'a, str> {
let ns = match multiname {
Multiname::QName { namespace, .. } => {
&abc.constant_pool.namespaces[namespace.0 as usize - 1]
@ -168,7 +169,7 @@ fn resolve_multiname_ns<'a>(abc: &'a AbcFile, multiname: &Multiname) -> &'a str
} else {
panic!("Unexpected Namespace {ns:?}");
};
strip_version_mark(namespace)
strip_version_mark(String::from_utf8_lossy(namespace))
}
fn flash_to_rust_path(path: &str) -> String {
@ -222,7 +223,7 @@ fn rust_method_name_and_path(
// For example, a namespace of "flash.system" and a name of "Security"
// turns into the path "flash::system::security"
let multiname = &abc.constant_pool.multinames[parent.0 as usize - 1];
let ns = flash_to_rust_path(resolve_multiname_ns(abc, multiname));
let ns = flash_to_rust_path(&resolve_multiname_ns(abc, multiname));
if !ns.is_empty() {
path += &ns;
path += "::";
@ -231,10 +232,10 @@ fn rust_method_name_and_path(
flash_method_path += "::";
}
let name = resolve_multiname_name(abc, multiname);
path += &flash_to_rust_path(name);
path += &flash_to_rust_path(&name);
path += "::";
flash_method_path += name;
flash_method_path += &*name;
flash_method_path += "::";
} else {
// This is a freestanding function. Append its namespace (the package).
@ -242,9 +243,9 @@ fn rust_method_name_and_path(
// has a namespace of "flash.utils", which turns into the path
// "flash::utils"
let name = resolve_multiname_ns(abc, trait_name);
let ns = &flash_to_rust_path(name);
let ns = &flash_to_rust_path(&name);
path += ns;
flash_method_path += name;
flash_method_path += &name;
if !ns.is_empty() {
path += "::";
flash_method_path += "::";
@ -255,10 +256,10 @@ fn rust_method_name_and_path(
// name (e.g. `getDefinitionByName`)
path += prefix;
let name = resolve_multiname_name(abc, trait_name);
let name = resolve_multiname_name(abc, trait_name).to_string();
path += &flash_to_rust_path(name);
flash_method_path += name;
path += &flash_to_rust_path(&name);
flash_method_path += &name;
path += suffix;
@ -374,14 +375,15 @@ fn write_native_table(data: &[u8], out_dir: &Path) -> Result<Vec<u8>, Box<dyn st
);
let instance_allocator_method_name =
"::".to_string() + &flash_to_rust_path(class_name) + "_allocator";
"::".to_string() + &flash_to_rust_path(&class_name) + "_allocator";
let native_instance_init_method_name = "::native_instance_init".to_string();
let call_handler_method_name = "::call_handler".to_string();
for metadata_idx in &trait_.metadata {
let metadata = &abc.metadata[metadata_idx.0 as usize];
let name = &abc.constant_pool.strings[metadata.name.0 as usize - 1];
let name =
String::from_utf8_lossy(&abc.constant_pool.strings[metadata.name.0 as usize - 1]);
let is_versioning = match name.as_str() {
let is_versioning = match &*name {
RUFFLE_METADATA_NAME => false,
API_METADATA_NAME => true,
_ => panic!("Unexpected class metadata {name:?}"),
@ -389,12 +391,13 @@ fn write_native_table(data: &[u8], out_dir: &Path) -> Result<Vec<u8>, Box<dyn st
for item in &metadata.items {
let key = if item.key.0 != 0 {
Some(abc.constant_pool.strings[item.key.0 as usize - 1].as_str())
Some(&abc.constant_pool.strings[item.key.0 as usize - 1])
} else {
None
};
let value = &abc.constant_pool.strings[item.value.0 as usize - 1];
match (key, value.as_str()) {
let value =
String::from_utf8_lossy(&abc.constant_pool.strings[item.value.0 as usize - 1]);
match (key, &*value) {
// Match `[Ruffle(InstanceAllocator)]`
(None, METADATA_INSTANCE_ALLOCATOR) if !is_versioning => {
// This results in a path of the form

View File

@ -329,14 +329,14 @@ pub fn display_function<'gc>(
// SWF's with debug information will provide a method name attached
// to the method definition, so we can use that.
output.push_char('/');
output.push_utf8(method.method_name());
output.push_utf8(&method.method_name());
}
// TODO: What happens if we can't find the trait?
}
// We purposely do nothing for instance initializers
} else if method.is_function && !method.method_name().is_empty() {
output.push_utf8("Function/");
output.push_utf8(method.method_name());
output.push_utf8(&method.method_name());
} else {
output.push_utf8("MethodInfo-");
output.push_utf8(&method.abc_method.to_string());

View File

@ -11,6 +11,7 @@ use crate::tag_utils::SwfMovie;
use gc_arena::barrier::unlock;
use gc_arena::lock::Lock;
use gc_arena::{Collect, Gc, Mutation};
use std::borrow::Cow;
use std::fmt;
use std::ops::Deref;
use std::rc::Rc;
@ -223,18 +224,18 @@ impl<'gc> BytecodeMethod<'gc> {
}
/// Get the name of this method.
pub fn method_name(&self) -> &str {
pub fn method_name(&self) -> Cow<'_, str> {
let name_index = self.method().name.0 as usize;
if name_index == 0 {
return "";
return Cow::Borrowed("");
}
self.abc
.constant_pool
.strings
.get(name_index - 1)
.map(|s| s.as_str())
.unwrap_or("")
.map(|s| String::from_utf8_lossy(s))
.unwrap_or(Cow::Borrowed(""))
}
/// Determine if a given method is variadic.

View File

@ -286,7 +286,7 @@ impl<'gc> TranslationUnit<'gc> {
}
let raw = if string_index == 0 {
""
&[]
} else {
write
.abc
@ -294,11 +294,12 @@ impl<'gc> TranslationUnit<'gc> {
.strings
.get(string_index as usize - 1)
.ok_or_else(|| format!("Unknown string constant {string_index}"))?
.as_slice()
};
let atom = context
.interner
.intern_wstr(context.gc_context, ruffle_wstr::from_utf8(raw));
.intern_wstr(context.gc_context, ruffle_wstr::from_utf8_bytes(raw));
write.strings[string_index as usize] = Some(atom);
Ok(atom)

View File

@ -99,11 +99,11 @@ impl<'a> Reader<'a> {
Ok(self.read_encoded_u32()? as i32)
}
fn read_string(&mut self) -> Result<String> {
fn read_string(&mut self) -> Result<Vec<u8>> {
let len = self.read_u30()?;
// TODO: Avoid allocating a String.
let mut s = String::with_capacity(len as usize);
self.read_slice(len as usize)?.read_to_string(&mut s)?;
let mut s = Vec::with_capacity(len as usize);
self.read_slice(len as usize)?.read_to_end(&mut s)?;
Ok(s)
}

View File

@ -21,7 +21,7 @@ pub struct ConstantPool {
pub ints: Vec<i32>,
pub uints: Vec<u32>,
pub doubles: Vec<f64>,
pub strings: Vec<String>,
pub strings: Vec<Vec<u8>>,
pub namespaces: Vec<Namespace>,
pub namespace_sets: Vec<NamespaceSet>,
pub multinames: Vec<Multiname>,

View File

@ -139,9 +139,9 @@ impl<W: Write> Writer<W> {
self.write_u30(i.0)
}
fn write_string(&mut self, s: &str) -> Result<()> {
fn write_string(&mut self, s: &[u8]) -> Result<()> {
self.write_u30(s.len() as u32)?;
self.output.write_all(s.as_bytes())?;
self.output.write_all(s)?;
Ok(())
}

View File

@ -2659,11 +2659,11 @@ pub fn avm2_tests() -> Vec<Avm2TestData> {
uints: vec![],
doubles: vec![],
strings: vec![
"".to_string(),
"void".to_string(),
"Avm2Test".to_string(),
"trace".to_string(),
"Test".to_string(),
"".to_string().into_bytes(),
"void".to_string().into_bytes(),
"Avm2Test".to_string().into_bytes(),
"trace".to_string().into_bytes(),
"Test".to_string().into_bytes(),
],
namespaces: vec![Namespace::Package(Index::new(1))],
namespace_sets: vec![],