From 66256dd3be61ea7d22561e1b952bc972acf17739 Mon Sep 17 00:00:00 2001 From: Mike Welsh Date: Wed, 20 Jan 2021 12:46:22 -0800 Subject: [PATCH] swf: SwfStr no longer contains encoding SwfStr is now an unsized slice analogous to `str` or `bstr`. The desired encoding must be supplied when converting to String. --- core/src/avm1/activation.rs | 50 +++++++---- core/src/avm1/function.rs | 20 ++++- core/src/display_object.rs | 4 +- core/src/display_object/edit_text.rs | 17 ++-- core/src/display_object/movie_clip.rs | 50 ++++++++--- core/src/font.rs | 11 +-- core/src/html/text_format.rs | 6 +- core/src/player.rs | 8 +- core/src/tag_utils.rs | 15 ++++ swf/src/avm1/read.rs | 21 +++-- swf/src/avm1/types.rs | 22 ++--- swf/src/avm1/write.rs | 2 +- swf/src/avm2/write.rs | 2 +- swf/src/read.rs | 39 ++++---- swf/src/string.rs | 122 ++++++++++++-------------- swf/src/test_data.rs | 3 +- swf/src/types.rs | 40 ++++----- swf/src/write.rs | 4 +- 18 files changed, 254 insertions(+), 182 deletions(-) diff --git a/core/src/avm1/activation.rs b/core/src/avm1/activation.rs index cd63de7d3..a4d691487 100644 --- a/core/src/avm1/activation.rs +++ b/core/src/avm1/activation.rs @@ -559,7 +559,9 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { Action::Return => self.action_return(), Action::SetMember => self.action_set_member(), Action::SetProperty => self.action_set_property(), - Action::SetTarget(target) => self.action_set_target(&target.to_str_lossy()), + Action::SetTarget(target) => { + self.action_set_target(&target.to_str_lossy(self.encoding())) + } Action::SetTarget2 => self.action_set_target2(), Action::SetVariable => self.action_set_variable(), Action::StackSwap => self.action_stack_swap(), @@ -889,13 +891,13 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { fn action_constant_pool( &mut self, - constant_pool: &[SwfStr<'_>], + constant_pool: &[&'_ SwfStr], ) -> Result, Error<'gc>> { self.context.avm1.constant_pool = GcCell::allocate( self.context.gc_context, constant_pool .iter() - .map(|s| (*s).to_string_lossy()) + .map(|s| (*s).to_string_lossy(self.encoding())) .collect(), ); self.set_constant_pool(self.context.avm1.constant_pool); @@ -911,11 +913,11 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { fn action_define_function( &mut self, - name: SwfStr<'_>, - params: &[SwfStr<'_>], + name: &'_ SwfStr, + params: &[&'_ SwfStr], actions: SwfSlice, ) -> Result, Error<'gc>> { - let name = name.to_str_lossy(); + let name = name.to_str_lossy(self.encoding()); let name = name.as_ref(); let swf_version = self.swf_version(); let scope = Scope::new_closure_scope(self.scope_cell(), self.context.gc_context); @@ -982,7 +984,7 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { if action_func.name.is_empty() { self.context.avm1.push(func_obj); } else { - self.define(&action_func.name.to_str_lossy(), func_obj); + self.define(&action_func.name.to_str_lossy(self.encoding()), func_obj); } Ok(FrameControl::Continue) @@ -1220,12 +1222,12 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { fn action_get_url( &mut self, - url: SwfStr<'_>, - target: SwfStr<'_>, + url: &'_ SwfStr, + target: &'_ SwfStr, ) -> Result, Error<'gc>> { - let target = target.to_str_lossy(); + let target = target.to_str_lossy(self.encoding()); let target = target.as_ref(); - let url = url.to_string_lossy(); + let url = url.to_string_lossy(self.encoding()); if target.starts_with("_level") && target.len() > 6 { match target[6..].parse::() { Ok(level_id) => { @@ -1425,13 +1427,15 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { Ok(FrameControl::Continue) } - fn action_goto_label(&mut self, label: SwfStr<'_>) -> Result, Error<'gc>> { + fn action_goto_label(&mut self, label: &'_ SwfStr) -> Result, Error<'gc>> { if let Some(clip) = self.target_clip() { if let Some(clip) = clip.as_movie_clip() { - if let Some(frame) = clip.frame_label_to_number(&label.to_str_lossy()) { + if let Some(frame) = + clip.frame_label_to_number(&label.to_str_lossy(self.encoding())) + { clip.goto_frame(&mut self.context, frame, true); } else { - avm_warn!(self, "GoToLabel: Frame label '{}' not found", label); + avm_warn!(self, "GoToLabel: Frame label '{:?}' not found", label); } } else { avm_warn!(self, "GoToLabel: Target is not a MovieClip"); @@ -1776,7 +1780,8 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { SwfValue::Float(v) => f64::from(*v).into(), SwfValue::Double(v) => (*v).into(), SwfValue::Str(v) => { - AvmString::new(self.context.gc_context, v.to_string_lossy()).into() + AvmString::new(self.context.gc_context, v.to_string_lossy(self.encoding())) + .into() } SwfValue::Register(v) => self.current_register(*v), SwfValue::ConstantPool(i) => { @@ -2279,9 +2284,10 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { ); match catch_vars { - CatchVar::Var(name) => { - activation.set_variable(&name.to_str_lossy(), value.to_owned())? - } + CatchVar::Var(name) => activation.set_variable( + &name.to_str_lossy(activation.encoding()), + value.to_owned(), + )?, CatchVar::Register(id) => { activation.set_current_register(*id, value.to_owned()) } @@ -2874,6 +2880,14 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { self.scope_cell().read().is_defined(self, name) } + /// Returns the suggested string encoding for actions. + /// For SWF version 6 and higher, this is always UTF-8. + /// For SWF version 5 and lower, this is locale-dependent, + /// and we default to WINDOWS-1252. + pub fn encoding(&self) -> &'static swf::Encoding { + swf::SwfStr::encoding_for_version(self.swf_version) + } + /// Returns the SWF version of the action or function being executed. pub fn swf_version(&self) -> u8 { self.swf_version diff --git a/core/src/avm1/function.rs b/core/src/avm1/function.rs index 0a60ff0c5..820db81f7 100644 --- a/core/src/avm1/function.rs +++ b/core/src/avm1/function.rs @@ -94,7 +94,7 @@ impl<'gc> Avm1Function<'gc> { swf_version: u8, actions: SwfSlice, name: &str, - params: &[SwfStr<'_>], + params: &[&'_ SwfStr], scope: GcCell<'gc, Scope<'gc>>, constant_pool: GcCell<'gc, Vec>, base_clip: DisplayObject<'gc>, @@ -121,7 +121,12 @@ impl<'gc> Avm1Function<'gc> { preload_global: false, params: params .iter() - .map(|&s| (None, s.to_string_lossy())) + .map(|&s| { + ( + None, + s.to_string_lossy(SwfStr::encoding_for_version(swf_version)), + ) + }) .collect(), scope, constant_pool, @@ -141,7 +146,11 @@ impl<'gc> Avm1Function<'gc> { let name = if swf_function.name.is_empty() { None } else { - Some(swf_function.name.to_string_lossy()) + Some( + swf_function + .name + .to_string_lossy(SwfStr::encoding_for_version(swf_version)), + ) }; let mut owned_params = Vec::new(); @@ -150,7 +159,10 @@ impl<'gc> Avm1Function<'gc> { register_index: r, } in &swf_function.params { - owned_params.push((*r, (*s).to_string_lossy())) + owned_params.push(( + *r, + (*s).to_string_lossy(SwfStr::encoding_for_version(swf_version)), + )) } Avm1Function { diff --git a/core/src/display_object.rs b/core/src/display_object.rs index 0e8c77e29..02b10984c 100644 --- a/core/src/display_object.rs +++ b/core/src/display_object.rs @@ -841,7 +841,9 @@ pub trait TDisplayObject<'gc>: self.set_color_transform(gc_context, &color_transform.clone().into()); } if let Some(name) = &place_object.name { - self.set_name(gc_context, &name.to_str_lossy()); + let encoding = swf::SwfStr::encoding_for_version(self.swf_version()); + let name = name.to_str_lossy(encoding); + self.set_name(gc_context, &name); } if let Some(clip_depth) = place_object.clip_depth { self.set_clip_depth(gc_context, clip_depth.into()); diff --git a/core/src/display_object/edit_text.rs b/core/src/display_object/edit_text.rs index 0766050fd..03b004485 100644 --- a/core/src/display_object/edit_text.rs +++ b/core/src/display_object/edit_text.rs @@ -152,11 +152,12 @@ impl<'gc> EditText<'gc> { let document = XMLDocument::new(context.gc_context); let text = swf_tag.initial_text.clone().unwrap_or_default(); let default_format = TextFormat::from_swf_tag(swf_tag.clone(), swf_movie.clone(), context); + let encoding = swf_movie.encoding(); let mut text_spans = FormatSpans::new(); text_spans.set_default_format(default_format.clone()); - let text = text.to_str_lossy(); + let text = text.to_str_lossy(encoding); if is_html { let _ = document .as_node() @@ -193,7 +194,7 @@ impl<'gc> EditText<'gc> { base.matrix_mut(context.gc_context).ty = bounds.y_min; let variable = if !swf_tag.variable_name.is_empty() { - Some(swf_tag.variable_name.clone()) + Some(swf_tag.variable_name) } else { None }; @@ -212,13 +213,15 @@ impl<'gc> EditText<'gc> { id: swf_tag.id, bounds: swf_tag.bounds, font_id: swf_tag.font_id, - font_class_name: swf_tag.font_class_name.map(|s| s.to_string_lossy()), + font_class_name: swf_tag + .font_class_name + .map(|s| s.to_string_lossy(encoding)), height: swf_tag.height, color: swf_tag.color.clone(), max_length: swf_tag.max_length, layout: swf_tag.layout.clone(), - variable_name: swf_tag.variable_name.to_string_lossy(), - initial_text: swf_tag.initial_text.map(|s| s.to_string_lossy()), + variable_name: swf_tag.variable_name.to_string_lossy(encoding), + initial_text: swf_tag.initial_text.map(|s| s.to_string_lossy(encoding)), is_word_wrap: swf_tag.is_word_wrap, is_multiline: swf_tag.is_multiline, is_password: swf_tag.is_password, @@ -247,7 +250,7 @@ impl<'gc> EditText<'gc> { intrinsic_bounds, bounds, autosize: AutoSizeMode::None, - variable: variable.map(|s| s.to_string_lossy()), + variable: variable.map(|s| s.to_string_lossy(encoding)), bound_stage_object: None, firing_variable_binding: false, selection: None, @@ -620,7 +623,7 @@ impl<'gc> EditText<'gc> { .initial_text .clone() .unwrap_or_default(); - let _ = self.set_text(text.to_string(), &mut activation.context); + let _ = self.set_text(text, &mut activation.context); self.0.write(activation.context.gc_context).variable = variable; self.try_bind_text_field_variable(activation, true); diff --git a/core/src/display_object/movie_clip.rs b/core/src/display_object/movie_clip.rs index 0f64ba8bd..8f8235142 100644 --- a/core/src/display_object/movie_clip.rs +++ b/core/src/display_object/movie_clip.rs @@ -459,7 +459,7 @@ impl<'gc> MovieClip<'gc> { // giving us a `SwfSlice` for later parsing, so we have to replcate the // *entire* parsing code here. This sucks. let flags = reader.read_u32()?; - let name = reader.read_string()?.to_string_lossy(); + let name = reader.read_string()?.to_string_lossy(reader.encoding()); let is_lazy_initialize = flags & 1 != 0; let domain = library.avm2_domain(); @@ -499,7 +499,7 @@ impl<'gc> MovieClip<'gc> { for _ in 0..num_symbols { let id = reader.read_u16()?; - let class_name = reader.read_string()?.to_string_lossy(); + let class_name = reader.read_string()?.to_string_lossy(reader.encoding()); if let Some(name) = Avm2QName::from_symbol_class(&class_name, activation.context.gc_context) @@ -565,10 +565,11 @@ impl<'gc> MovieClip<'gc> { .map(|fld| fld.frame_num + 1) .unwrap_or_else(|| static_data.total_frames as u32 + 1); + let label = label.to_string_lossy(reader.encoding()); static_data.scene_labels.insert( - label.to_string(), + label.clone(), Scene { - name: label.to_string(), + name: label, start, length: end as u16 - start as u16, }, @@ -576,9 +577,10 @@ impl<'gc> MovieClip<'gc> { } for FrameLabelData { frame_num, label } in sfl_data.frame_labels { - static_data - .frame_labels - .insert(label.to_string(), frame_num as u16 + 1); + static_data.frame_labels.insert( + label.to_string_lossy(reader.encoding()), + frame_num as u16 + 1, + ); } Ok(()) @@ -2468,7 +2470,13 @@ impl<'gc, 'a> MovieClipData<'gc> { is_bold: false, is_italic: false, }; - let font_object = Font::from_swf_tag(context.gc_context, context.renderer, &font).unwrap(); + let font_object = Font::from_swf_tag( + context.gc_context, + context.renderer, + &font, + reader.encoding(), + ) + .unwrap(); context .library .library_for_movie_mut(self.movie()) @@ -2483,7 +2491,13 @@ impl<'gc, 'a> MovieClipData<'gc> { reader: &mut SwfStream<'a>, ) -> DecodeResult { let font = reader.read_define_font_2(2)?; - let font_object = Font::from_swf_tag(context.gc_context, context.renderer, &font).unwrap(); + let font_object = Font::from_swf_tag( + context.gc_context, + context.renderer, + &font, + reader.encoding(), + ) + .unwrap(); context .library .library_for_movie_mut(self.movie()) @@ -2498,7 +2512,13 @@ impl<'gc, 'a> MovieClipData<'gc> { reader: &mut SwfStream<'a>, ) -> DecodeResult { let font = reader.read_define_font_2(3)?; - let font_object = Font::from_swf_tag(context.gc_context, context.renderer, &font).unwrap(); + let font_object = Font::from_swf_tag( + context.gc_context, + context.renderer, + &font, + reader.encoding(), + ) + .unwrap(); context .library .library_for_movie_mut(self.movie()) @@ -2606,15 +2626,16 @@ impl<'gc, 'a> MovieClipData<'gc> { ) -> DecodeResult { let exports = reader.read_export_assets()?; for export in exports { + let name = export.name.to_str_lossy(reader.encoding()); let character = context .library .library_for_movie_mut(self.movie()) - .register_export(export.id, &export.name.to_str_lossy()); + .register_export(export.id, &name); // TODO: do other types of Character need to know their exported name? if let Some(Character::MovieClip(movie_clip)) = character { *movie_clip.0.read().static_data.exported_name.borrow_mut() = - Some(export.name.to_string()); + Some(name.to_string()); } } Ok(()) @@ -2631,7 +2652,10 @@ impl<'gc, 'a> MovieClipData<'gc> { ) -> DecodeResult { let frame_label = reader.read_frame_label(tag_len)?; // Frame labels are case insensitive (ASCII). - let label = frame_label.label.to_str_lossy().to_ascii_lowercase(); + let label = frame_label + .label + .to_str_lossy(reader.encoding()) + .to_ascii_lowercase(); if let std::collections::hash_map::Entry::Vacant(v) = static_data.frame_labels.entry(label) { v.insert(cur_frame); diff --git a/core/src/font.rs b/core/src/font.rs index 4e480d04d..6ab91daa2 100644 --- a/core/src/font.rs +++ b/core/src/font.rs @@ -101,6 +101,7 @@ impl<'gc> Font<'gc> { gc_context: MutationContext<'gc, '_>, renderer: &mut dyn RenderBackend, tag: &swf::Font, + encoding: &'static swf::Encoding, ) -> Result, Error> { let mut glyphs = vec![]; let mut code_point_to_glyph = fnv::FnvHashMap::default(); @@ -124,7 +125,7 @@ impl<'gc> Font<'gc> { fnv::FnvHashMap::default() }; - let descriptor = FontDescriptor::from_swf_tag(tag); + let descriptor = FontDescriptor::from_swf_tag(tag, encoding); let (ascent, descent, leading) = if let Some(layout) = &tag.layout { (layout.ascent, layout.descent, layout.leading) } else { @@ -392,12 +393,8 @@ pub struct FontDescriptor { impl FontDescriptor { /// Obtain a font descriptor from a SWF font tag. - pub fn from_swf_tag(val: &swf::Font) -> Self { - let mut name = val.name.to_string(); - - if let Some(first_null) = name.find('\0') { - name.truncate(first_null); - }; + pub fn from_swf_tag(val: &swf::Font, encoding: &'static swf::Encoding) -> Self { + let name = val.name.to_string_lossy(encoding); Self { name, diff --git a/core/src/html/text_format.rs b/core/src/html/text_format.rs index 0ee0dc8c6..e6511aa77 100644 --- a/core/src/html/text_format.rs +++ b/core/src/html/text_format.rs @@ -193,12 +193,12 @@ impl TextFormat { swf_movie: Arc, context: &mut UpdateContext<'_, 'gc, '_>, ) -> Self { + let encoding = swf_movie.encoding(); let movie_library = context.library.library_for_movie_mut(swf_movie); - let font = et.font_id.and_then(|fid| movie_library.get_font(fid)); let font_class = et .font_class_name - .map(|s| s.to_string_lossy()) + .map(|s| s.to_string_lossy(encoding)) .or_else(|| font.map(|font| font.descriptor().class().to_string())) .unwrap_or_else(|| "Times New Roman".to_string()); let align = et.layout.clone().map(|l| l.align); @@ -211,7 +211,7 @@ impl TextFormat { // Times New Roman non-bold, non-italic. This will need to be revised // when we start supporting device fonts. Self { - font: Some(font_class.to_string()), + font: Some(font_class), size: et.height.map(|h| h.to_pixels()), color: et.color, align, diff --git a/core/src/player.rs b/core/src/player.rs index 85ba4bbd4..37d6bf325 100644 --- a/core/src/player.rs +++ b/core/src/player.rs @@ -1281,8 +1281,12 @@ impl Player { renderer: &mut dyn RenderBackend, ) -> Result, Error> { let mut reader = swf::read::Reader::new(data, 8); - let device_font = - crate::font::Font::from_swf_tag(gc_context, renderer, &reader.read_define_font_2(3)?)?; + let device_font = crate::font::Font::from_swf_tag( + gc_context, + renderer, + &reader.read_define_font_2(3)?, + reader.encoding(), + )?; Ok(device_font) } diff --git a/core/src/tag_utils.rs b/core/src/tag_utils.rs index aae3c0f4f..eac1be6d7 100644 --- a/core/src/tag_utils.rs +++ b/core/src/tag_utils.rs @@ -25,6 +25,9 @@ pub struct SwfMovie { /// Any parameters provided when loading this movie (also known as 'flashvars') parameters: PropertyMap, + + /// The suggest encoding for this SWF. + encoding: &'static swf::Encoding, } impl SwfMovie { @@ -42,6 +45,7 @@ impl SwfMovie { data: vec![], url: None, parameters: PropertyMap::new(), + encoding: swf::UTF_8, } } @@ -56,6 +60,7 @@ impl SwfMovie { data, url: source.url.clone(), parameters: source.parameters.clone(), + encoding: source.encoding, } } @@ -74,11 +79,13 @@ impl SwfMovie { /// Construct a movie based on the contents of the SWF datastream. pub fn from_data(swf_data: &[u8], url: Option) -> Result { let swf_buf = swf::read::decompress_swf(&swf_data[..])?; + let encoding = swf::SwfStr::encoding_for_version(swf_buf.header.version); Ok(Self { header: swf_buf.header, data: swf_buf.data, url, parameters: PropertyMap::new(), + encoding, }) } @@ -95,6 +102,14 @@ impl SwfMovie { &self.data } + /// Returns the suggested string encoding for the given SWF version. + /// For SWF version 6 and higher, this is always UTF-8. + /// For SWF version 5 and lower, this is locale-dependent, + /// and we default to WINDOWS-1252. + pub fn encoding(&self) -> &'static swf::Encoding { + self.encoding + } + pub fn width(&self) -> u32 { (self.header.stage_size.x_max - self.header.stage_size.x_min).to_pixels() as u32 } diff --git a/swf/src/avm1/read.rs b/swf/src/avm1/read.rs index b93d9789b..ebbad7a04 100644 --- a/swf/src/avm1/read.rs +++ b/swf/src/avm1/read.rs @@ -3,7 +3,7 @@ use crate::avm1::{opcode::OpCode, types::*}; use crate::error::{Error, Result}; use crate::read::SwfReadExt; -use crate::string::SwfStr; +use crate::string::{Encoding, SwfStr, UTF_8, WINDOWS_1252}; use byteorder::{LittleEndian, ReadBytesExt}; use std::io::{self, Read}; @@ -11,27 +11,35 @@ use std::io::{self, Read}; pub struct Reader<'a> { input: &'a [u8], version: u8, - encoding: &'static encoding_rs::Encoding, + encoding: &'static Encoding, } impl<'a> Reader<'a> { + #[inline] pub fn new(input: &'a [u8], version: u8) -> Self { Self { input, version, encoding: if version > 5 { - encoding_rs::UTF_8 + UTF_8 } else { // TODO: Allow configurable encoding - encoding_rs::WINDOWS_1252 + WINDOWS_1252 }, } } + #[inline] + pub fn encoding(&self) -> &'static Encoding { + SwfStr::encoding_for_version(self.version) + } + + #[inline] pub fn get_ref(&self) -> &'a [u8] { self.input } + #[inline] pub fn get_mut(&mut self) -> &mut &'a [u8] { &mut self.input } @@ -49,7 +57,7 @@ impl<'a> Reader<'a> { } } - pub fn read_string(&mut self) -> io::Result> { + pub fn read_string(&mut self) -> io::Result<&'a SwfStr> { let mut pos = 0; loop { let byte = *self.input.get(pos).ok_or_else(|| { @@ -63,7 +71,7 @@ impl<'a> Reader<'a> { let s = unsafe { let slice = self.input.get_unchecked(..pos); - SwfStr::from_bytes_unchecked(slice, self.encoding) + SwfStr::from_bytes(slice) }; self.input = &self.input[pos + 1..]; Ok(s) @@ -483,7 +491,6 @@ pub mod tests { #[test] fn read_define_function() { - use encoding_rs::WINDOWS_1252; // Ensure we read a function properly along with the function data. let action_bytes = vec![ 0x9b, 0x08, 0x00, 0x66, 0x6f, 0x6f, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x96, 0x06, 0x00, diff --git a/swf/src/avm1/types.rs b/swf/src/avm1/types.rs index 1e41c8c53..709a1b966 100644 --- a/swf/src/avm1/types.rs +++ b/swf/src/avm1/types.rs @@ -18,11 +18,11 @@ pub enum Action<'a> { CastOp, CharToAscii, CloneSprite, - ConstantPool(Vec>), + ConstantPool(Vec<&'a SwfStr>), Decrement, DefineFunction { - name: SwfStr<'a>, - params: Vec>, + name: &'a SwfStr, + params: Vec<&'a SwfStr>, actions: &'a [u8], }, DefineFunction2(Function<'a>), @@ -41,8 +41,8 @@ pub enum Action<'a> { GetProperty, GetTime, GetUrl { - url: SwfStr<'a>, - target: SwfStr<'a>, + url: &'a SwfStr, + target: &'a SwfStr, }, GetUrl2 { send_vars_method: SendVarsMethod, @@ -55,7 +55,7 @@ pub enum Action<'a> { set_playing: bool, scene_offset: u16, }, - GotoLabel(SwfStr<'a>), + GotoLabel(&'a SwfStr), Greater, If { offset: i16, @@ -91,7 +91,7 @@ pub enum Action<'a> { Return, SetMember, SetProperty, - SetTarget(SwfStr<'a>), + SetTarget(&'a SwfStr), SetTarget2, SetVariable, StackSwap, @@ -140,7 +140,7 @@ pub enum Value<'a> { Int(i32), Float(f32), Double(f64), - Str(SwfStr<'a>), + Str(&'a SwfStr), Register(u8), ConstantPool(u16), } @@ -154,7 +154,7 @@ pub enum SendVarsMethod { #[derive(Clone, Debug, PartialEq)] pub struct Function<'a> { - pub name: SwfStr<'a>, + pub name: &'a SwfStr, pub register_count: u8, pub params: Vec>, pub preload_parent: bool, @@ -171,7 +171,7 @@ pub struct Function<'a> { #[derive(Clone, Debug, PartialEq, Eq)] pub struct FunctionParam<'a> { - pub name: SwfStr<'a>, + pub name: &'a SwfStr, pub register_index: Option, } @@ -184,6 +184,6 @@ pub struct TryBlock<'a> { #[derive(Clone, Debug, PartialEq, Eq)] pub enum CatchVar<'a> { - Var(SwfStr<'a>), + Var(&'a SwfStr), Register(u8), } diff --git a/swf/src/avm1/write.rs b/swf/src/avm1/write.rs index 6c97828da..38dc7aceb 100644 --- a/swf/src/avm1/write.rs +++ b/swf/src/avm1/write.rs @@ -60,7 +60,7 @@ impl SwfWriteExt for Writer { } #[inline] - fn write_string(&mut self, s: SwfStr<'_>) -> io::Result<()> { + fn write_string(&mut self, s: &'_ SwfStr) -> io::Result<()> { self.output.write_all(s.as_bytes())?; self.write_u8(0) } diff --git a/swf/src/avm2/write.rs b/swf/src/avm2/write.rs index 8114134ea..38102250b 100644 --- a/swf/src/avm2/write.rs +++ b/swf/src/avm2/write.rs @@ -56,7 +56,7 @@ impl SwfWriteExt for Writer { } #[inline] - fn write_string(&mut self, s: SwfStr<'_>) -> io::Result<()> { + fn write_string(&mut self, s: &'_ SwfStr) -> io::Result<()> { self.output.write_all(s.as_bytes())?; self.write_u8(0) } diff --git a/swf/src/read.rs b/swf/src/read.rs index cf098aba1..e3a455132 100644 --- a/swf/src/read.rs +++ b/swf/src/read.rs @@ -8,7 +8,7 @@ use crate::{ error::{Error, Result}, - string::SwfStr, + string::{Encoding, SwfStr}, types::*, }; use bitstream_io::BitRead; @@ -248,28 +248,30 @@ impl<'a, 'b> BitReader<'a, 'b> { pub struct Reader<'a> { input: &'a [u8], version: u8, - encoding: &'static encoding_rs::Encoding, } impl<'a> Reader<'a> { + #[inline] pub fn new(input: &'a [u8], version: u8) -> Reader<'a> { - Reader { - input, - version, - encoding: if version > 5 { - encoding_rs::UTF_8 - } else { - // TODO: Allow configurable encoding - encoding_rs::WINDOWS_1252 - }, - } + Reader { input, version } } + /// Returns the suggested string encoding for this SWF. + /// For SWF version 6 and higher, this is always UTF-8. + /// For SWF version 5 and lower, this is locale-dependent, + /// and we default to WINDOWS-1252. + #[inline] + pub fn encoding(&self) -> &'static Encoding { + SwfStr::encoding_for_version(self.version) + } + + #[inline] pub fn version(&self) -> u8 { self.version } /// Returns a reference to the underlying `Reader`. + #[inline] pub fn get_ref(&self) -> &'a [u8] { self.input } @@ -277,6 +279,7 @@ impl<'a> Reader<'a> { /// Returns a mutable reference to the underlying `Reader`. /// /// Reading from this reference is not recommended. + #[inline] pub fn get_mut(&mut self) -> &mut &'a [u8] { &mut self.input } @@ -317,7 +320,7 @@ impl<'a> Reader<'a> { slice } - pub fn read_string(&mut self) -> io::Result> { + pub fn read_string(&mut self) -> io::Result<&'a SwfStr> { let mut pos = 0; loop { let byte = *self.input.get(pos).ok_or_else(|| { @@ -329,16 +332,14 @@ impl<'a> Reader<'a> { pos += 1; } - let s = unsafe { - let slice = self.input.get_unchecked(..pos); - SwfStr::from_bytes_unchecked(slice, self.encoding) - }; + let slice = unsafe { self.input.get_unchecked(..pos) }; + let s = SwfStr::from_bytes(slice); self.input = &self.input[pos + 1..]; Ok(s) } - fn read_string_with_len(&mut self, len: usize) -> io::Result> { - Ok(SwfStr::from_bytes(&self.read_slice(len)?, self.encoding)) + fn read_string_with_len(&mut self, len: usize) -> io::Result<&'a SwfStr> { + Ok(SwfStr::from_bytes_null_terminated(&self.read_slice(len)?)) } /// Reads the next SWF tag from the stream. diff --git a/swf/src/string.rs b/swf/src/string.rs index f193c30b7..981191449 100644 --- a/swf/src/string.rs +++ b/swf/src/string.rs @@ -1,58 +1,49 @@ -//! String typed used by SWF files. -//! -//! Allows for locale-dependent encoding for SWF version <6. +//! String type used by SWF files. -use encoding_rs::{Encoding, UTF_8}; +pub use encoding_rs::{Encoding, SHIFT_JIS, UTF_8, WINDOWS_1252}; use std::{borrow::Cow, fmt}; -/// `SwfStr` is returned by SWF and AVM1 parsing functions. -/// `SwfStr` is analogous to `&str`, with some additional allowances: -/// * An encoding is specified along with the string data. -/// * The string contains no null bytes. +/// `SwfStr` is the string type returned by SWF parsing functions. +/// `SwfStr` is a bstr-like type analogous to `str`: +/// * The encoding depends on the SWF version (UTF-8 for SWF6 and higher). +/// Use `Reader::encoding` or `SwfStr::encoding_for_version` to get the +/// proper encoding. /// * Invalid data for any particular encoding is allowed; /// any conversions to std::String will be lossy for invalid data. -/// This handles the locale dependent encoding of early SWF files and -/// mimics C-style null-terminated string behavior. /// To convert this to a standard Rust string, use `SwfStr::to_str_lossy`. -/// `SwfStr`s are equal if both their encoding and data matches. -#[derive(Copy, Clone, Eq, PartialEq)] -pub struct SwfStr<'a> { +#[derive(Eq, PartialEq)] +#[repr(transparent)] +pub struct SwfStr { /// The string bytes. - string: &'a [u8], - - /// The encoding of the string data. - encoding: &'static Encoding, + string: [u8], } -impl<'a> SwfStr<'a> { +impl SwfStr { /// Create a new `SwfStr` from a byte slice with a given encoding. /// The string will be truncated if a null byte is encountered. /// The data is not required to be valid for the given encoding. #[inline] - pub fn from_bytes(string: &'a [u8], encoding: &'static Encoding) -> Self { - let i = string.iter().position(|&c| c == 0).unwrap_or(string.len()); - Self { - string: &string[..i], - encoding, - } + pub fn from_bytes(string: &[u8]) -> &Self { + unsafe { &*(string as *const [u8] as *const Self) } } - /// Create a new `SwfStr` from a byte slice with a given encoding. - /// The data is not required to be valid for the given encoding. - /// - /// # Safety - /// - /// The string should contain no null bytes. #[inline] - pub unsafe fn from_bytes_unchecked(string: &'a [u8], encoding: &'static Encoding) -> Self { - Self { string, encoding } + pub fn from_bytes_null_terminated(string: &[u8]) -> &Self { + let i = string.iter().position(|&c| c == 0).unwrap_or(string.len()); + Self::from_bytes(&string[..i]) + } + + /// Create a new UTF-8 `SwfStr` from a Rust `str`. + #[inline] + pub fn from_utf8_str(string: &str) -> &Self { + Self::from_bytes(string.as_bytes()) } /// Create a new UTF-8 `SwfStr` from a Rust `str`. /// The string will be truncated if a null byte is encountered. #[inline] - pub fn from_utf8_str(string: &'a str) -> Self { - Self::from_bytes(string.as_bytes(), UTF_8) + pub fn from_utf8_str_null_terminated(string: &str) -> &Self { + Self::from_bytes_null_terminated(string.as_bytes()) } /// Create a new `SwfStr` with the given encoding from a Rust `str`. @@ -60,24 +51,34 @@ impl<'a> SwfStr<'a> { /// The string will be truncated if a null byte is encountered. /// `None` is returned if the encoding is not lossless. /// Intended for tests. - pub fn from_str_with_encoding(string: &'a str, encoding: &'static Encoding) -> Option { + pub fn from_str_with_encoding<'a>( + string: &'a str, + encoding: &'static Encoding, + ) -> Option<&'a Self> { if let (Cow::Borrowed(s), _, false) = encoding.encode(&string) { - Some(Self::from_bytes(s, encoding)) + Some(Self::from_bytes(s)) } else { None } } - /// Returns the byte slice of this string. + /// Returns the suggested string encoding for the given SWF version. + /// For SWF version 6 and higher, this is always UTF-8. + /// For SWF version 5 and lower, this is locale-dependent, + /// and we default to WINDOWS-1252. #[inline] - pub fn as_bytes(&self) -> &'a [u8] { - self.string + pub fn encoding_for_version(swf_version: u8) -> &'static Encoding { + if swf_version >= 6 { + UTF_8 + } else { + WINDOWS_1252 + } } - /// Returns the encoding used by this string. + /// Returns the byte slice of this string. #[inline] - pub fn encoding(&self) -> &'static Encoding { - self.encoding + pub fn as_bytes(&self) -> &[u8] { + &self.string } /// Returns `true` if the string has a length of zero, and `false` otherwise. @@ -95,47 +96,40 @@ impl<'a> SwfStr<'a> { /// Decodes the string into a Rust UTF-8 `str`. /// The UTF-8 replacement character will be uses for any invalid data. #[inline] - pub fn to_str_lossy(&self) -> Cow<'a, str> { - self.encoding.decode_without_bom_handling(self.string).0 + pub fn to_str_lossy(&self, encoding: &'static Encoding) -> Cow<'_, str> { + encoding.decode_without_bom_handling(&self.string).0 } /// Decodes the string into a Rust UTF-8 `String`. /// The UTF-8 replacement character will be uses for any invalid data. #[inline] - pub fn to_string_lossy(&self) -> String { - self.to_str_lossy().into_owned() + pub fn to_string_lossy(&self, encoding: &'static Encoding) -> String { + self.to_str_lossy(encoding).into_owned() } } -impl<'a> Default for SwfStr<'a> { - fn default() -> Self { - Self { - string: &[], - encoding: UTF_8, - } +impl<'a> Default for &'a SwfStr { + fn default() -> &'a SwfStr { + SwfStr::from_bytes(&[]) } } -impl<'a> From<&'a str> for SwfStr<'a> { - fn from(s: &'a str) -> Self { +impl<'a> From<&'a str> for &'a SwfStr { + fn from(s: &'a str) -> &'a SwfStr { SwfStr::from_utf8_str(s) } } -impl<'a, T: AsRef> PartialEq for SwfStr<'a> { +impl<'a, T: ?Sized + AsRef> PartialEq for SwfStr { fn eq(&self, other: &T) -> bool { - self.string == other.as_ref().as_bytes() + &self.string == other.as_ref().as_bytes() } } -impl<'a> fmt::Display for SwfStr<'a> { +impl fmt::Debug for SwfStr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.to_str_lossy()) - } -} - -impl<'a> fmt::Debug for SwfStr<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.to_str_lossy()) + // Note that this assumes UTF-8 encoding; + // other encodings like Shift-JIS will output gibberish. + f.write_str(&self.to_str_lossy(UTF_8)) } } diff --git a/swf/src/test_data.rs b/swf/src/test_data.rs index bab003be0..1da41d7f3 100644 --- a/swf/src/test_data.rs +++ b/swf/src/test_data.rs @@ -5,11 +5,10 @@ use crate::avm2::read::tests::read_abc_from_file; use crate::avm2::types::*; use crate::read::tests::{read_tag_bytes_from_file, read_tag_bytes_from_file_with_index}; use crate::read::{decompress_swf, parse_swf}; -use crate::string::SwfStr; +use crate::string::{SwfStr, WINDOWS_1252}; use crate::tag_code::TagCode; use crate::types::*; use crate::write::write_swf; -use encoding_rs::WINDOWS_1252; use std::fs::File; use std::vec::Vec; diff --git a/swf/src/types.rs b/swf/src/types.rs index 2a5aaa5c7..57c436798 100644 --- a/swf/src/types.rs +++ b/swf/src/types.rs @@ -247,7 +247,7 @@ pub struct FileAttributes { #[derive(Debug, PartialEq)] pub struct FrameLabel<'a> { - pub label: SwfStr<'a>, + pub label: &'a SwfStr, pub is_anchor: bool, } @@ -260,7 +260,7 @@ pub struct DefineSceneAndFrameLabelData<'a> { #[derive(Debug, PartialEq)] pub struct FrameLabelData<'a> { pub frame_num: u32, - pub label: SwfStr<'a>, + pub label: &'a SwfStr, } pub type Depth = u16; @@ -274,9 +274,9 @@ pub struct PlaceObject<'a> { pub matrix: Option, pub color_transform: Option, pub ratio: Option, - pub name: Option>, + pub name: Option<&'a SwfStr>, pub clip_depth: Option, - pub class_name: Option>, + pub class_name: Option<&'a SwfStr>, pub filters: Option>, pub background_color: Option, pub blend_mode: Option, @@ -472,7 +472,7 @@ pub enum Tag<'a> { }, ShowFrame, - Protect(Option>), + Protect(Option<&'a SwfStr>), CsmTextSettings(CsmTextSettings), DebugId(DebugId), DefineBinaryData { @@ -505,8 +505,8 @@ pub enum Tag<'a> { DefineFontInfo(Box>), DefineFontName { id: CharacterId, - name: SwfStr<'a>, - copyright_info: SwfStr<'a>, + name: &'a SwfStr, + copyright_info: &'a SwfStr, }, DefineMorphShape(Box), DefineScalingGrid { @@ -524,14 +524,14 @@ pub enum Tag<'a> { id: CharacterId, action_data: &'a [u8], }, - EnableDebugger(SwfStr<'a>), + EnableDebugger(&'a SwfStr), EnableTelemetry { password_hash: &'a [u8], }, End, - Metadata(SwfStr<'a>), + Metadata(&'a SwfStr), ImportAssets { - url: SwfStr<'a>, + url: &'a SwfStr, imports: Vec>, }, JpegTables(JpegTables<'a>), @@ -545,7 +545,7 @@ pub enum Tag<'a> { SoundStreamHead2(Box), StartSound(StartSound), StartSound2 { - class_name: SwfStr<'a>, + class_name: &'a SwfStr, sound_info: Box, }, SymbolClass(Vec>), @@ -570,7 +570,7 @@ pub type ExportAssets<'a> = Vec>; #[derive(Debug, PartialEq, Clone)] pub struct ExportedAsset<'a> { pub id: CharacterId, - pub name: SwfStr<'a>, + pub name: &'a SwfStr, } #[derive(Debug, PartialEq, Clone)] @@ -584,7 +584,7 @@ pub type SetBackgroundColor = Color; #[derive(Debug, PartialEq, Clone)] pub struct SymbolClassLink<'a> { pub id: CharacterId, - pub class_name: SwfStr<'a>, + pub class_name: &'a SwfStr, } #[derive(Debug, PartialEq, Clone)] @@ -897,7 +897,7 @@ pub struct FontV1 { pub struct Font<'a> { pub version: u8, pub id: CharacterId, - pub name: SwfStr<'a>, + pub name: &'a SwfStr, pub language: Language, pub layout: Option, pub glyphs: Vec, @@ -913,7 +913,7 @@ pub struct Font4<'a> { pub id: CharacterId, pub is_italic: bool, pub is_bold: bool, - pub name: SwfStr<'a>, + pub name: &'a SwfStr, pub data: Option<&'a [u8]>, } @@ -944,7 +944,7 @@ pub struct KerningRecord { pub struct FontInfo<'a> { pub id: CharacterId, pub version: u8, - pub name: SwfStr<'a>, + pub name: &'a SwfStr, pub is_small_text: bool, pub is_shift_jis: bool, pub is_ansi: bool, @@ -983,13 +983,13 @@ pub struct EditText<'a> { pub id: CharacterId, pub bounds: Rectangle, pub font_id: Option, // TODO(Herschel): Combine with height - pub font_class_name: Option>, + pub font_class_name: Option<&'a SwfStr>, pub height: Option, pub color: Option, pub max_length: Option, pub layout: Option, - pub variable_name: SwfStr<'a>, - pub initial_text: Option>, + pub variable_name: &'a SwfStr, + pub initial_text: Option<&'a SwfStr>, pub is_word_wrap: bool, pub is_multiline: bool, pub is_password: bool, @@ -1116,7 +1116,7 @@ pub struct DefineBitsJpeg3<'a> { #[derive(Clone, Debug, PartialEq)] pub struct DoAbc<'a> { - pub name: SwfStr<'a>, + pub name: &'a SwfStr, pub is_lazy_initialize: bool, pub data: &'a [u8], } diff --git a/swf/src/write.rs b/swf/src/write.rs index f8078a9c9..47e36ef92 100644 --- a/swf/src/write.rs +++ b/swf/src/write.rs @@ -144,7 +144,7 @@ pub trait SwfWriteExt { fn write_i32(&mut self, n: i32) -> io::Result<()>; fn write_f32(&mut self, n: f32) -> io::Result<()>; fn write_f64(&mut self, n: f64) -> io::Result<()>; - fn write_string(&mut self, s: SwfStr<'_>) -> io::Result<()>; + fn write_string(&mut self, s: &'_ SwfStr) -> io::Result<()>; } pub struct BitWriter { @@ -257,7 +257,7 @@ impl SwfWriteExt for Writer { } #[inline] - fn write_string(&mut self, s: SwfStr<'_>) -> io::Result<()> { + fn write_string(&mut self, s: &'_ SwfStr) -> io::Result<()> { self.output.write_all(s.as_bytes())?; self.write_u8(0) }