From c457fba947e0824793f6a97fb6e3964d53cbe863 Mon Sep 17 00:00:00 2001 From: Yaman Kassir Date: Sun, 15 Sep 2024 19:39:52 +0200 Subject: [PATCH] core: Strip BOM in URLLoader text data (#17513) --- .../avm2/globals/flash/utils/byte_array.rs | 58 ++++++++++-------- core/src/loader.rs | 11 ++-- tests/tests/swfs/avm2/bom/Test.as | 58 ++++++++++++++++++ tests/tests/swfs/avm2/bom/output.txt | 9 +++ tests/tests/swfs/avm2/bom/test.swf | Bin 0 -> 1451 bytes .../swfs/avm2/{utf16_bom => bom}/test.toml | 0 tests/tests/swfs/avm2/bom/utf16be | Bin 0 -> 60 bytes tests/tests/swfs/avm2/bom/utf16le | Bin 0 -> 60 bytes tests/tests/swfs/avm2/bom/utf8 | 1 + tests/tests/swfs/avm2/utf16_bom/Test.as | 28 --------- tests/tests/swfs/avm2/utf16_bom/output.txt | 2 - tests/tests/swfs/avm2/utf16_bom/test.fla | Bin 3830 -> 0 bytes tests/tests/swfs/avm2/utf16_bom/test.swf | Bin 697 -> 0 bytes 13 files changed, 103 insertions(+), 64 deletions(-) create mode 100755 tests/tests/swfs/avm2/bom/Test.as create mode 100644 tests/tests/swfs/avm2/bom/output.txt create mode 100755 tests/tests/swfs/avm2/bom/test.swf rename tests/tests/swfs/avm2/{utf16_bom => bom}/test.toml (100%) create mode 100644 tests/tests/swfs/avm2/bom/utf16be create mode 100644 tests/tests/swfs/avm2/bom/utf16le create mode 100644 tests/tests/swfs/avm2/bom/utf8 delete mode 100755 tests/tests/swfs/avm2/utf16_bom/Test.as delete mode 100644 tests/tests/swfs/avm2/utf16_bom/output.txt delete mode 100755 tests/tests/swfs/avm2/utf16_bom/test.fla delete mode 100755 tests/tests/swfs/avm2/utf16_bom/test.swf diff --git a/core/src/avm2/globals/flash/utils/byte_array.rs b/core/src/avm2/globals/flash/utils/byte_array.rs index aec135a38..56d8e448f 100644 --- a/core/src/avm2/globals/flash/utils/byte_array.rs +++ b/core/src/avm2/globals/flash/utils/byte_array.rs @@ -183,39 +183,43 @@ pub fn read_utf<'gc>( Ok(Value::Undefined) } + +pub fn strip_bom<'gc>(activation: &mut Activation<'_, 'gc>, mut bytes: &[u8]) -> AvmString<'gc> { + // UTF-8 BOM + if let Some(without_bom) = bytes.strip_prefix(&[0xEF, 0xBB, 0xBF]) { + bytes = without_bom; + // Little-endian UTF-16 BOM + } else if let Some(without_bom) = bytes.strip_prefix(&[0xFF, 0xFE]) { + let utf16_bytes: Vec<_> = without_bom + .chunks_exact(2) + .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) + .collect(); + return AvmString::new( + activation.context.gc_context, + WString::from_buf(utf16_bytes), + ); + // Big-endian UTF-16 BOM + } else if let Some(without_bom) = bytes.strip_prefix(&[0xFE, 0xFF]) { + let utf16_bytes: Vec<_> = without_bom + .chunks_exact(2) + .map(|pair| u16::from_be_bytes([pair[0], pair[1]])) + .collect(); + return AvmString::new( + activation.context.gc_context, + WString::from_buf(utf16_bytes), + ); + } + + AvmString::new_utf8_bytes(activation.context.gc_context, bytes) +} + pub fn to_string<'gc>( activation: &mut Activation<'_, 'gc>, this: Object<'gc>, _args: &[Value<'gc>], ) -> Result, Error<'gc>> { if let Some(bytearray) = this.as_bytearray() { - let mut bytes = bytearray.bytes(); - if let Some(without_bom) = bytes.strip_prefix(&[0xEF, 0xBB, 0xBF]) { - bytes = without_bom; - // Little-endian UTF-16 BOM - } else if let Some(without_bom) = bytes.strip_prefix(&[0xFF, 0xFE]) { - let utf16_bytes: Vec<_> = without_bom - .chunks_exact(2) - .map(|pair| u16::from_le_bytes([pair[0], pair[1]])) - .collect(); - return Ok(AvmString::new( - activation.context.gc_context, - WString::from_buf(utf16_bytes), - ) - .into()); - // Big-endian UTF-16 BOM - } else if let Some(without_bom) = bytes.strip_prefix(&[0xFE, 0xFF]) { - let utf16_bytes: Vec<_> = without_bom - .chunks_exact(2) - .map(|pair| u16::from_be_bytes([pair[0], pair[1]])) - .collect(); - return Ok(AvmString::new( - activation.context.gc_context, - WString::from_buf(utf16_bytes), - ) - .into()); - } - return Ok(AvmString::new_utf8_bytes(activation.context.gc_context, bytes).into()); + return Ok(strip_bom(activation, bytearray.bytes()).into()); } Ok(Value::Undefined) diff --git a/core/src/loader.rs b/core/src/loader.rs index c42946a64..f41537846 100644 --- a/core/src/loader.rs +++ b/core/src/loader.rs @@ -5,13 +5,14 @@ use crate::avm1::{Attribute, Avm1}; use crate::avm1::{ExecutionReason, NativeObject}; use crate::avm1::{Object, SoundObject, TObject, Value}; use crate::avm2::bytearray::ByteArrayStorage; +use crate::avm2::globals::flash::utils::byte_array::strip_bom; use crate::avm2::object::{ ByteArrayObject, EventObject as Avm2EventObject, FileReferenceObject, LoaderStream, TObject as _, }; use crate::avm2::{ Activation as Avm2Activation, Avm2, BitmapDataObject, Domain as Avm2Domain, - Object as Avm2Object, Value as Avm2Value, + Object as Avm2Object, }; use crate::backend::navigator::{ErrorResponse, OwnedFuture, Request, SuccessResponse}; use crate::backend::ui::DialogResultFuture; @@ -1560,8 +1561,7 @@ impl<'gc> Loader<'gc> { if body.is_empty() { None } else { - let string_value = - AvmString::new_utf8_bytes(activation.context.gc_context, &body); + let string_value = strip_bom(activation, &body); activation .avm2() @@ -1576,10 +1576,7 @@ impl<'gc> Loader<'gc> { tracing::warn!("Invalid URLLoaderDataFormat: {}", data_format); } - let string_value = - AvmString::new_utf8_bytes(activation.context.gc_context, &body); - - Some(Avm2Value::String(string_value)) + Some(strip_bom(activation, &body).into()) }; if let Some(data_object) = data_object { diff --git a/tests/tests/swfs/avm2/bom/Test.as b/tests/tests/swfs/avm2/bom/Test.as new file mode 100755 index 000000000..8f3dc1d12 --- /dev/null +++ b/tests/tests/swfs/avm2/bom/Test.as @@ -0,0 +1,58 @@ +package { + + import flash.display.MovieClip; + import flash.utils.ByteArray; + import flash.utils.Endian; + import flash.net.URLLoader; + import flash.net.URLLoaderDataFormat; + import flash.net.URLRequest; + import flash.events.Event; + import flash.events.IOErrorEvent; + + + public class Test extends MovieClip { + + + public function Test() { + var utf8 = new ByteArray(); + var utf8Bytes = [0xef, 0xbb, 0xbf, 0x46, 0x78]; + for each (var byte in utf8Bytes) { + utf8.writeByte(byte); + } + trace("ByteArray UTF-8: " + utf8); + + var utf16le = new ByteArray(); + var utf16leBytes = [0xff, 0xfe, 0x0, 0x22, 0x78, 0x0]; + for each (var byte in utf16leBytes) { + utf16le.writeByte(byte); + } + trace("ByteArray UTF-16 Little endian: " + utf16le); + + var utf16be = new ByteArray(); + var utf16beBytes = [0xfe, 0xff, 0x22, 0x0, 0x0, 0x78]; + for each (var byte in utf16beBytes) { + utf16be.writeByte(byte); + } + trace("ByteArray UTF-16 Big endian: " + utf16be); + + var files = ["utf8", "utf16le", "utf16be", "utf8", "utf16le", "utf16be"]; + var current = files.shift(); + var urlLoader = new URLLoader(); + urlLoader.dataFormat = URLLoaderDataFormat.TEXT; + urlLoader.addEventListener(IOErrorEvent.IO_ERROR, function(event:IOErrorEvent):void { + trace("URLLoader IOError: " + event); + }); + urlLoader.addEventListener(Event.COMPLETE, function(event:Event):void { + trace("URLLoader dataFormat=" + urlLoader.dataFormat + " " + current + ": " + event.target.data); + if (files.length > 0) { + if (files.length == 3) { + urlLoader.dataFormat = URLLoaderDataFormat.VARIABLES; + } + current = files.shift(); + urlLoader.load(new URLRequest(current)); + } + }); + urlLoader.load(new URLRequest(current)); + } + } +} diff --git a/tests/tests/swfs/avm2/bom/output.txt b/tests/tests/swfs/avm2/bom/output.txt new file mode 100644 index 000000000..b314dafb0 --- /dev/null +++ b/tests/tests/swfs/avm2/bom/output.txt @@ -0,0 +1,9 @@ +ByteArray UTF-8: Fx +ByteArray UTF-16 Little endian: ∀x +ByteArray UTF-16 Big endian: ∀x +URLLoader dataFormat=text utf8: lastName=Jones&firstName=Tom +URLLoader dataFormat=text utf16le: lastName=Jo∀nes&firstName=Tom +URLLoader dataFormat=text utf16be: lastName=Jo∀nes&firstName=Tom +URLLoader dataFormat=variables utf8: firstName=Tom&lastName=Jones +URLLoader dataFormat=variables utf16le: firstName=Tom&lastName=Jo%E2%88%80nes +URLLoader dataFormat=variables utf16be: firstName=Tom&lastName=Jo%E2%88%80nes diff --git a/tests/tests/swfs/avm2/bom/test.swf b/tests/tests/swfs/avm2/bom/test.swf new file mode 100755 index 0000000000000000000000000000000000000000..18bb6f1961d5679ae6b960d12008f8334b088217 GIT binary patch literal 1451 zcmV;c1yuS&S5q?i2mk!+$mTVFN4iE?qJ;|XV z2lCTRiez~VYGuilWCN?d3ah?E7kP!yo;JHKy6DL+&I^=YNe1F(SNquK>i&Fl=lkxA zj)DIRAoerBIDu+n0swwWo)LhRhsRAL(_yxwTjpxERFt!shilOlyWXhmR<|-3HJu7R zt!58hw^fm(qobqZ(L&L(52Vua^0FkCrEDsE~wHLEFMKq||na&IMnwYpYm zSawr&*VI-{wWJW3_FKtw}E(q(eGUo2n^Q zS%(=`i`kOhZZr%gxy->9gMa6!;YezyS(a+p(hH0f(dFa!u&(g2;R-h1(hX*3Otr~Y zvm2V%;GWM;X0sQ|UWrjIE_ia(+PYC+&B|G+$ExIs^BJuum+wNo)?kx6KR=Jf0iNv? zo^R~!z24caLN)#&v;uEO-wOaJIy9Mm!0Hf(B0fNs_gV+>8>^$UO+#-D`8*Zty3;b$ zld=8%18d*4_Uo*no4VKY{vk73p_b)@%1d{rF5;SN*D$raVK%#)_ocKD8^*RNf{ z)KypA^5-tpY+6Z z8E(zhJ1j_??2q?>Oa-xEdli4jUR-sxLuLy>{m2$h*>hX(s4jc9SMfNw-aEZlCun1D z0Z)*;D0%bc>L$rht!>-LAXmu z*^g~pLJT@HvCKe$!hUa;0+g-<5WI$RviRu4WgE7wo(u$)ZC5uOvF$dNyokel7GFbC zJo2dZoQcbt%*z*Bg(cJ;vr3DGA9ZN&A=95$TFh+gu4}LiGwZr)UR2dSud1=>FRC8t z2bU${<;3tVJ9epV`O_Cy>veyp+q&a26gT3Xa)Y^S_&?$d_34=SLiJQ%{Bl@?V6c)Y zF2$#Y$G9*VCux$tmQJO|(&OogbUHm5o{CJTZl{V|EG*E77#$cKiV=P|o~49PE=0*7 z4aF!)P(CS+63Wj|a$U@#aYM`z@g|{(TjCu;LvwVbKzWJsGUZER858bO{vPG;Q+|mi zD&i`kWRiv+Qf{4c8*BX0q#%ko%|{K zdhU1mBgq52djf9~-Ne%boXs~$cVzY>nf-;#z9qS>-0`b0I=LLYS4RGbCji7>f4hO6 zkdjnJyQ4eTNSO;IKAfL>+ezgYl%&@p!i@)Te0O5@oXq}1a*)gBj&txz!0K=gw6K!W z_;?Z#YkVynV2&Z?q=(6e6MqMo)A>bZtcN)hU<%y~0wOFT0)CHuOp2IP8P|jWQ>f7Z za{@859;QGqFmw53Wx~Vk-ptGSd-{5J_?%sZy_RJ`fdnf+@$rZYD z`IU8UXOcs+kY8I5`KC|v{R^6-N$-6#f}+DHgaF2H4j=_MGZTRCqH&-A1Av-EKm_ze z!5d)c7N&#Gp_eTty%(Ezv%oqU{{k?> Fj)szh*vkL_ literal 0 HcmV?d00001 diff --git a/tests/tests/swfs/avm2/utf16_bom/test.toml b/tests/tests/swfs/avm2/bom/test.toml similarity index 100% rename from tests/tests/swfs/avm2/utf16_bom/test.toml rename to tests/tests/swfs/avm2/bom/test.toml diff --git a/tests/tests/swfs/avm2/bom/utf16be b/tests/tests/swfs/avm2/bom/utf16be new file mode 100644 index 0000000000000000000000000000000000000000..a121aaad98d1981158d9c7634d1896e388eec860 GIT binary patch literal 60 zcmezOpCN}Kk)fEOguxF;=Q5-+*fMxAdguu}}p literal 0 HcmV?d00001 diff --git a/tests/tests/swfs/avm2/bom/utf16le b/tests/tests/swfs/avm2/bom/utf16le new file mode 100644 index 0000000000000000000000000000000000000000..f54b04960ec2d49aec3953bb89b68dd4a171e32e GIT binary patch literal 60 ycmezWFNYzKp_rkB!4F91GNdxtGI%lMGcYLS0olb2Y7A)%nG8i}DnfuNav1=+-we$F literal 0 HcmV?d00001 diff --git a/tests/tests/swfs/avm2/bom/utf8 b/tests/tests/swfs/avm2/bom/utf8 new file mode 100644 index 000000000..c7f3e3ca5 --- /dev/null +++ b/tests/tests/swfs/avm2/bom/utf8 @@ -0,0 +1 @@ +lastName=Jones&firstName=Tom \ No newline at end of file diff --git a/tests/tests/swfs/avm2/utf16_bom/Test.as b/tests/tests/swfs/avm2/utf16_bom/Test.as deleted file mode 100755 index 3c2c93f83..000000000 --- a/tests/tests/swfs/avm2/utf16_bom/Test.as +++ /dev/null @@ -1,28 +0,0 @@ -package { - - import flash.display.MovieClip; - import flash.utils.ByteArray; - import flash.utils.Endian; - - - public class Test extends MovieClip { - - - public function Test() { - var le = new ByteArray(); - var leBytes = [0xff, 0xfe, 0x0, 0x22, 0x78, 0x0]; - for each (var byte in leBytes) { - le.writeByte(byte); - } - trace("Little endian: " + le); - - var be = new ByteArray(); - var beBytes = [0xfe, 0xff, 0x22, 0x0, 0x0, 0x78]; - for each (var byte in beBytes) { - be.writeByte(byte); - } - trace("Big endian: " + be); - } - } - -} diff --git a/tests/tests/swfs/avm2/utf16_bom/output.txt b/tests/tests/swfs/avm2/utf16_bom/output.txt deleted file mode 100644 index c31ff56ed..000000000 --- a/tests/tests/swfs/avm2/utf16_bom/output.txt +++ /dev/null @@ -1,2 +0,0 @@ -Little endian: ∀x -Big endian: ∀x diff --git a/tests/tests/swfs/avm2/utf16_bom/test.fla b/tests/tests/swfs/avm2/utf16_bom/test.fla deleted file mode 100755 index 11c69d3c0f5aeb227790cafba264bcfc5423feef..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3830 zcmbVPc{o(<8y@?ZY?(13WGUIVLD^|!CtGE&VJ1t%h^ee&AC#zwL}cH$vSiPetuQEx z$QCJ45@qRk=2Q7})%VBmnd_Nz=A8R}-)GKspX<7BV|^-W7zhLgfdc((PS@U?AaZ~} zAaVnjK(5ZN7`(ST2IcOKb#_4EopElGm)y`2C^XI{;5I>o2els2;@rx0x<({%@c;2IERa_7&l;IS8SH~NOY1Ei+{;r zK!V}51*NQF`ul}PuEmXwiAW$P;aGH%VSh<<>ruB)#vvJqJ;jpe%9f1#LcBGtr#B8?qwGk>R8L^PQcv!SL5~VBhZ2B7%r;u`hf74xNM?CM~hKok>f<)f!1;r+x|*Qohm&g+PxY@ z^2}no`W^m-npA5v8om7*Gv2Rr%bX}e)T_4gXk_d>Y$uqBd*3Y{l?-7>_n|=y*?`=(~Zx zRclP}NZYb)POE4a=5=(CicP$?(y4GxUn7ySj515eI+c1}P&{wcFWvOa(58L`=0K8Y zZeO4^Nv$T^&9^Q5?(=8@ji6a?8CNepItbiR7-jVKx1<$8Ku#%|Pl$_hfchGCZ_= z?rTS-tG0>Qsw!lR(=19c%aU1M_Iixq=^ak`pse-clQafzWS>mah!?+C-Xta<`_X=z4?qv^QD+Cd`RrcfsXF|vsy{r#To3L1{!aZ!*S_BD(FmgZDOz(5$ zV<9i38Id)|Q+_UJNTtf_5dUd`$FMYYi_z0u7rJ)sjn!U~~HK^ZX2(TD6nmgc4KSvpcuC&sEqu zS0z2|CNv)S;*)^!X1Hn=lV4l+-aEu+CEbI>J}9zshwjdXOT7&EU6I`b6Wp0+?-oxE zvCd0`Dvl@2OF-s17gqBM7+*>?zTjLdRm>_*8?0!^DfT5~j=|4=WX$Zz)7wZ8S@wUz zCmHncZuovpdp==?0Nk*td(so<*F#&mR|2>j^CMq|@o^HD7$-QI!M?7@at4xYC~4{$ zBkHr)(_WS9_6cOm9WP0RTm4~DS32V2`Q#^FPyb-@ny zLENZ9dP?AnVu~AO5Sk+(3sE1YL{>yYLWVj+#F2Ty>xTrqG4DyC$X>R?LZq+{zAfvJ z=GzYOywNdv!~2Z&Y@SVOs%b$6PtKRF_=kCj$dsn|Jt?*s$#J#6cC}Dh`JP~P>EMux z8n}Ri8T~jxBZJ}!Ti)ya25^0z!Ggf^FTIY#KYd99*FO(kGAK4=h16xpvT4LOLO4BJ z*T6*xWoZBLQ+qUZ6!O6J@UD&{VCgGaPuW7P=2WO>b8g0V>>a1OwUl}u2HoNuteqvA zQ~7Ve2IdkwR6$M9?w2hFEU_G~nBE!`*FSPLJgD#RS+TX#sxY&I(`-NM1jW&esOe(kC+YBcr`SydSa%43e+88caO=j@q-rV#0aCL{DKT#Dq|I;Rpo z!dNdF!Os&l<&wq>h?mXoZYpxDioNZr${eA$Qs#|kco0?>HLO!6G!iyc!*g}yao!!} zH0M+|XonS2*iW6kb(+Qk@L|zUjIyOmdJH3haI3wl zD52&wu??B3RVk$at7ivv?nyhcf1t~6`8tV)t)=V46q^zeUPolFnKKDj7hDrMUY*Tv z%~JY#I3gOP9K@bDA#HmYG1GX>f>A1W-J9mfIXiSyr1tgs*EuhJb=NBp#aOudD=U6fRa=2ZSM z9(CIbaTx2aa~dYYXR|}{?sE4t-k2z(um&Gf@;1b9d-5IfO2X=By&fIw^ryPWWGdz? zbNZ5;VOg6WQRcZ1k3g>C8e)+985&EC8EZVA;PwYDDGE75x)~L7v??9F zVz<@9iEUOE4X;Y2C^4CWmyHo&HtHl(*&E3{;mvY6{#e#dKCd zamR?gG|9@BuiHF{lNFDB@CJQF*kOe*E}czv6)FGb-CT9dgcGeD67N7Ko5#}jj$_OB zsHyXH&l;S7{77&B3O2!BkS$=q)4E)NOtV01yeh~)D*Ajn?)}Nj(~*2%e6(3F2vWtj z@i#ItzGaeb*e}gxCfkCI_;t0GmZ=CC0JCTuct7Aihyv~J6tPCeO+qIglw zO3=Ljw)vsYnc6F#pXwBunnS}taM8k>lommw4khPw>Xm06MTP}N8>s5sT5=;4PL(xd zDNd|CWalAO$8dyqC)Jui@UkT$wEKce(BY|%)ZrEFn|)$eP)&!%7>K zqoUt7>5B-J<0>mOHkY_dMHgXAXljJf>EL~0*Kz3`c3rZz@f$d-HZ^%>C~JgGHu$msY-g%xfjoapr9F^y^6`n7;?HhBSKp1;gP_9!d32_tyBDesLR{yS^1`rjVQBt(HuY)6nmd zgoNGFdQ-6RUGC}30ct`==fHS$#SGoe4Sl1o#)M?N>36or8w(ehJA%Jc{Im?)PX3q0 z08UuRE(2%pjK%!hG3<8P?ga~90e1l+H`!v?JG)7mdAn+2+%bUPcR)EfVH|4){M}1O+7ww39=A4-!9i0uE|G|B4m*yIDJ^?f9|%n*}(y z0lkwh{>~!ji|rz_;(-3;&W!(KGjiV8E;6ka=s%>7?Op$tKYs9m!#vQxrIDX||II?~ zkkM5C8hN*&+`a$rmU3r_1Yn6j7|j2gfh@3{>e};Xx$S=JzYA`M{|(^(L4H36_*+)p bSwNd%C%GEy)6kOdK!DG6K(R#N9tiY5#V6}N diff --git a/tests/tests/swfs/avm2/utf16_bom/test.swf b/tests/tests/swfs/avm2/utf16_bom/test.swf deleted file mode 100755 index cf86a93e084e7f3849dacafc99517bf7dd7bc1de..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 697 zcmV;q0!IBqS5qqn1ONbdoPCp9Q`0~cfOm7-r1#njC4jUVUL2B=*7hBs;)U6LK`*FLrqJoQDl*U4^XJQ8z zmjSrQeq;j&T`5owR0&A0u}9QEaDj-oY~5>@k1Vfi>u1t#r*Gj6+v--{RvT`w)v|Ge z&@gWzy_1&L(EG=gMvt_XR}YQO@!DFzDeE3%V`Ssn8I9x&Dj5SKBebJomL9QePYOKi zu1m9>xE2`)NE3vjmD+@KtE#FWbqriKJICt!-ilh$v{lvUSvIkpnW>7aHx*~6{d?U2 zu1q^OLtQuCW0P!j9HLteb{D6Eb{v9T-6U2Yhky}L1l5EHwNN&bO}Pi0L1RbT$!Z3*EB(s`KY3T!^cvwU&} ziYb|u)AAiTBWLBDoX^iOabAE(WTi+n7EdH&2(pOb5f_&d5WyJ)Wt5sn%mQNWB4!aW z#mSjs#)l0+$Vlwi0T~E62&e(22z#k*Q