diff --git a/core/src/avm2/globals/flash/utils/byte_array.rs b/core/src/avm2/globals/flash/utils/byte_array.rs index aec135a38..56d8e448f 100644 --- a/core/src/avm2/globals/flash/utils/byte_array.rs +++ b/core/src/avm2/globals/flash/utils/byte_array.rs @@ -183,39 +183,43 @@ pub fn read_utf<'gc>( Ok(Value::Undefined) } + +pub fn strip_bom<'gc>(activation: &mut Activation<'_, 'gc>, mut bytes: &[u8]) -> AvmString<'gc> { + // UTF-8 BOM + if let Some(without_bom) = bytes.strip_prefix(&[0xEF, 0xBB, 0xBF]) { + bytes = without_bom; + // Little-endian UTF-16 BOM + } else if let Some(without_bom) = bytes.strip_prefix(&[0xFF, 0xFE]) { + let utf16_bytes: Vec<_> = without_bom + .chunks_exact(2) + .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) + .collect(); + return AvmString::new( + activation.context.gc_context, + WString::from_buf(utf16_bytes), + ); + // Big-endian UTF-16 BOM + } else if let Some(without_bom) = bytes.strip_prefix(&[0xFE, 0xFF]) { + let utf16_bytes: Vec<_> = without_bom + .chunks_exact(2) + .map(|pair| u16::from_be_bytes([pair[0], pair[1]])) + .collect(); + return AvmString::new( + activation.context.gc_context, + WString::from_buf(utf16_bytes), + ); + } + + AvmString::new_utf8_bytes(activation.context.gc_context, bytes) +} + pub fn to_string<'gc>( activation: &mut Activation<'_, 'gc>, this: Object<'gc>, _args: &[Value<'gc>], ) -> Result, Error<'gc>> { if let Some(bytearray) = this.as_bytearray() { - let mut bytes = bytearray.bytes(); - if let Some(without_bom) = bytes.strip_prefix(&[0xEF, 0xBB, 0xBF]) { - bytes = without_bom; - // Little-endian UTF-16 BOM - } else if let Some(without_bom) = bytes.strip_prefix(&[0xFF, 0xFE]) { - let utf16_bytes: Vec<_> = without_bom - .chunks_exact(2) - .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) - .collect(); - return Ok(AvmString::new( - activation.context.gc_context, - WString::from_buf(utf16_bytes), - ) - .into()); - // Big-endian UTF-16 BOM - } else if let Some(without_bom) = bytes.strip_prefix(&[0xFE, 0xFF]) { - let utf16_bytes: Vec<_> = without_bom - .chunks_exact(2) - .map(|pair| u16::from_be_bytes([pair[0], pair[1]])) - .collect(); - return Ok(AvmString::new( - activation.context.gc_context, - WString::from_buf(utf16_bytes), - ) - .into()); - } - return Ok(AvmString::new_utf8_bytes(activation.context.gc_context, bytes).into()); + return Ok(strip_bom(activation, bytearray.bytes()).into()); } Ok(Value::Undefined) diff --git a/core/src/loader.rs b/core/src/loader.rs index c42946a64..f41537846 100644 --- a/core/src/loader.rs +++ b/core/src/loader.rs @@ -5,13 +5,14 @@ use crate::avm1::{Attribute, Avm1}; use crate::avm1::{ExecutionReason, NativeObject}; use crate::avm1::{Object, SoundObject, TObject, Value}; use crate::avm2::bytearray::ByteArrayStorage; +use crate::avm2::globals::flash::utils::byte_array::strip_bom; use crate::avm2::object::{ ByteArrayObject, EventObject as Avm2EventObject, FileReferenceObject, LoaderStream, TObject as _, }; use crate::avm2::{ Activation as Avm2Activation, Avm2, BitmapDataObject, Domain as Avm2Domain, - Object as Avm2Object, Value as Avm2Value, + Object as Avm2Object, }; use crate::backend::navigator::{ErrorResponse, OwnedFuture, Request, SuccessResponse}; use crate::backend::ui::DialogResultFuture; @@ -1560,8 +1561,7 @@ impl<'gc> Loader<'gc> { if body.is_empty() { None } else { - let string_value = - AvmString::new_utf8_bytes(activation.context.gc_context, &body); + let string_value = strip_bom(activation, &body); activation .avm2() @@ -1576,10 +1576,7 @@ impl<'gc> Loader<'gc> { tracing::warn!("Invalid URLLoaderDataFormat: {}", data_format); } - let string_value = - AvmString::new_utf8_bytes(activation.context.gc_context, &body); - - Some(Avm2Value::String(string_value)) + Some(strip_bom(activation, &body).into()) }; if let Some(data_object) = data_object { diff --git a/tests/tests/swfs/avm2/bom/Test.as b/tests/tests/swfs/avm2/bom/Test.as new file mode 100755 index 000000000..8f3dc1d12 --- /dev/null +++ b/tests/tests/swfs/avm2/bom/Test.as @@ -0,0 +1,58 @@ +package { + + import flash.display.MovieClip; + import flash.utils.ByteArray; + import flash.utils.Endian; + import flash.net.URLLoader; + import flash.net.URLLoaderDataFormat; + import flash.net.URLRequest; + import flash.events.Event; + import flash.events.IOErrorEvent; + + + public class Test extends MovieClip { + + + public function Test() { + var utf8 = new ByteArray(); + var utf8Bytes = [0xef, 0xbb, 0xbf, 0x46, 0x78]; + for each (var byte in utf8Bytes) { + utf8.writeByte(byte); + } + trace("ByteArray UTF-8: " + utf8); + + var utf16le = new ByteArray(); + var utf16leBytes = [0xff, 0xfe, 0x0, 0x22, 0x78, 0x0]; + for each (var byte in utf16leBytes) { + utf16le.writeByte(byte); + } + trace("ByteArray UTF-16 Little endian: " + utf16le); + + var utf16be = new ByteArray(); + var utf16beBytes = [0xfe, 0xff, 0x22, 0x0, 0x0, 0x78]; + for each (var byte in utf16beBytes) { + utf16be.writeByte(byte); + } + trace("ByteArray UTF-16 Big endian: " + utf16be); + + var files = ["utf8", "utf16le", "utf16be", "utf8", "utf16le", "utf16be"]; + var current = files.shift(); + var urlLoader = new URLLoader(); + urlLoader.dataFormat = URLLoaderDataFormat.TEXT; + urlLoader.addEventListener(IOErrorEvent.IO_ERROR, function(event:IOErrorEvent):void { + trace("URLLoader IOError: " + event); + }); + urlLoader.addEventListener(Event.COMPLETE, function(event:Event):void { + trace("URLLoader dataFormat=" + urlLoader.dataFormat + " " + current + ": " + event.target.data); + if (files.length > 0) { + if (files.length == 3) { + urlLoader.dataFormat = URLLoaderDataFormat.VARIABLES; + } + current = files.shift(); + urlLoader.load(new URLRequest(current)); + } + }); + urlLoader.load(new URLRequest(current)); + } + } +} diff --git a/tests/tests/swfs/avm2/bom/output.txt b/tests/tests/swfs/avm2/bom/output.txt new file mode 100644 index 000000000..b314dafb0 --- /dev/null +++ b/tests/tests/swfs/avm2/bom/output.txt @@ -0,0 +1,9 @@ +ByteArray UTF-8: Fx +ByteArray UTF-16 Little endian: ∀x +ByteArray UTF-16 Big endian: ∀x +URLLoader dataFormat=text utf8: lastName=Jones&firstName=Tom +URLLoader dataFormat=text utf16le: lastName=Jo∀nes&firstName=Tom +URLLoader dataFormat=text utf16be: lastName=Jo∀nes&firstName=Tom +URLLoader dataFormat=variables utf8: firstName=Tom&lastName=Jones +URLLoader dataFormat=variables utf16le: firstName=Tom&lastName=Jo%E2%88%80nes +URLLoader dataFormat=variables utf16be: firstName=Tom&lastName=Jo%E2%88%80nes diff --git a/tests/tests/swfs/avm2/bom/test.swf b/tests/tests/swfs/avm2/bom/test.swf new file mode 100755 index 000000000..18bb6f196 Binary files /dev/null and b/tests/tests/swfs/avm2/bom/test.swf differ diff --git a/tests/tests/swfs/avm2/utf16_bom/test.toml b/tests/tests/swfs/avm2/bom/test.toml similarity index 100% rename from tests/tests/swfs/avm2/utf16_bom/test.toml rename to tests/tests/swfs/avm2/bom/test.toml diff --git a/tests/tests/swfs/avm2/bom/utf16be b/tests/tests/swfs/avm2/bom/utf16be new file mode 100644 index 000000000..a121aaad9 Binary files /dev/null and b/tests/tests/swfs/avm2/bom/utf16be differ diff --git a/tests/tests/swfs/avm2/bom/utf16le b/tests/tests/swfs/avm2/bom/utf16le new file mode 100644 index 000000000..f54b04960 Binary files /dev/null and b/tests/tests/swfs/avm2/bom/utf16le differ diff --git a/tests/tests/swfs/avm2/bom/utf8 b/tests/tests/swfs/avm2/bom/utf8 new file mode 100644 index 000000000..c7f3e3ca5 --- /dev/null +++ b/tests/tests/swfs/avm2/bom/utf8 @@ -0,0 +1 @@ +lastName=Jones&firstName=Tom \ No newline at end of file diff --git a/tests/tests/swfs/avm2/utf16_bom/Test.as b/tests/tests/swfs/avm2/utf16_bom/Test.as deleted file mode 100755 index 3c2c93f83..000000000 --- a/tests/tests/swfs/avm2/utf16_bom/Test.as +++ /dev/null @@ -1,28 +0,0 @@ -package { - - import flash.display.MovieClip; - import flash.utils.ByteArray; - import flash.utils.Endian; - - - public class Test extends MovieClip { - - - public function Test() { - var le = new ByteArray(); - var leBytes = [0xff, 0xfe, 0x0, 0x22, 0x78, 0x0]; - for each (var byte in leBytes) { - le.writeByte(byte); - } - trace("Little endian: " + le); - - var be = new ByteArray(); - var beBytes = [0xfe, 0xff, 0x22, 0x0, 0x0, 0x78]; - for each (var byte in beBytes) { - be.writeByte(byte); - } - trace("Big endian: " + be); - } - } - -} diff --git a/tests/tests/swfs/avm2/utf16_bom/output.txt b/tests/tests/swfs/avm2/utf16_bom/output.txt deleted file mode 100644 index c31ff56ed..000000000 --- a/tests/tests/swfs/avm2/utf16_bom/output.txt +++ /dev/null @@ -1,2 +0,0 @@ -Little endian: ∀x -Big endian: ∀x diff --git a/tests/tests/swfs/avm2/utf16_bom/test.fla b/tests/tests/swfs/avm2/utf16_bom/test.fla deleted file mode 100755 index 11c69d3c0..000000000 Binary files a/tests/tests/swfs/avm2/utf16_bom/test.fla and /dev/null differ diff --git a/tests/tests/swfs/avm2/utf16_bom/test.swf b/tests/tests/swfs/avm2/utf16_bom/test.swf deleted file mode 100755 index cf86a93e0..000000000 Binary files a/tests/tests/swfs/avm2/utf16_bom/test.swf and /dev/null differ