core: Add support of non-UTF-8 encodings to the form loader

Flash's form loader loads text files in the local system codepage if
System#useCodepage has been set to true. Previously, Ruffle always
(wrongly) used UTF-8, leading to incorrectly displayed characters.
This has been fixed. Ruffle now supports loading files with an encoding
other than UTF-8.
As Ruffle doesn't always have access to the system codepage and as it's
not reliably the correct encoding, the crate chardetng has been added.
It's used instead of the system codepage to detect the encoding, and the
data is converted into UTF-8.
This commit is contained in:
Kornelius Rohrschneider 2024-06-12 13:53:52 +02:00 committed by Adrian Wielgosik
parent 837c922b55
commit 870bdae6fd
3 changed files with 26 additions and 1 deletions

12
Cargo.lock generated
View File

@ -759,6 +759,17 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
[[package]]
name = "chardetng"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
dependencies = [
"cfg-if",
"encoding_rs",
"memchr",
]
[[package]]
name = "chrono"
version = "0.4.38"
@ -4325,6 +4336,7 @@ dependencies = [
"build_playerglobal",
"bytemuck",
"byteorder",
"chardetng",
"chrono",
"clap",
"dasp",

View File

@ -67,6 +67,7 @@ num-bigint = "0.4"
unic-segment = "0.9.0"
id3 = "1.13.1"
either = "1.12.0"
chardetng = "0.1.17"
[target.'cfg(not(target_family = "wasm"))'.dependencies.futures]
workspace = true

View File

@ -30,6 +30,7 @@ use crate::string::AvmString;
use crate::tag_utils::SwfMovie;
use crate::vminterface::Instantiator;
use crate::{avm2_stub_method, avm2_stub_method_context};
use chardetng::EncodingDetector;
use encoding_rs::UTF_8;
use gc_arena::{Collect, GcCell};
use indexmap::IndexMap;
@ -1232,7 +1233,18 @@ impl<'gc> Loader<'gc> {
ActivationIdentifier::root("[Form Loader]"),
);
for (k, v) in form_urlencoded::parse(&body) {
let utf8_string;
let utf8_body = if activation.context.system.use_codepage {
let mut encoding_detector = EncodingDetector::new();
encoding_detector.feed(&body, true);
let encoding = encoding_detector.guess(None, true);
utf8_string = encoding.decode(&body).0;
utf8_string.as_bytes()
} else {
&body
};
for (k, v) in form_urlencoded::parse(utf8_body) {
let k = AvmString::new_utf8(activation.context.gc_context, k);
let v = AvmString::new_utf8(activation.context.gc_context, v);
that.set(k, v.into(), &mut activation)?;