Add rudimentary custom DOM impl on top of `quick_xml`.

`quick_xml` was chosen due to it's high performance and support for zero-copy use cases. However, we are not using `minidom`, which is the already-extant DOM impl that uses `quick_xml` as it's parsing provider. This is because `minidom` nodes are not amenable to garbage collection.

Specifically: we want to be able to construct a new `Object` variant that holds part of an XML node. However, `minidom::Element` directly owns it's children, meaning that we can't hold references to it from within `Object` while also keeping those objects to the `'gc` lifetime. Hence, we provide a GC-exclusive DOM implementation.

I ruled out solutions such as holding an entire XML tree in an `Rc` and having AVM objects that shadow them. This works for `SwfSlice` because indexing an array is cheap; but traversing a tree can get very expensive. XML is used in many places in Flash Player, so it's important that we treat it like a first-class citizen.
This commit is contained in:
David Wendt 2019-12-19 22:19:22 -05:00
parent 4ce67535b0
commit 89c9753520
6 changed files with 237 additions and 0 deletions

10
Cargo.lock generated
View File

@ -1432,6 +1432,14 @@ name = "quick-error"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "quick-xml"
version = "0.17.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quote"
version = "0.6.13"
@ -1575,6 +1583,7 @@ dependencies = [
"minimp3 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"num_enum 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"puremp3 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-xml 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"ruffle_macros 0.1.0",
"smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2563,6 +2572,7 @@ dependencies = [
"checksum proc-macro2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0319972dcae462681daf4da1adeeaa066e3ebd29c69be96c6abb1259d2ee2bcc"
"checksum puremp3 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f2b7efbb39e373af70c139e0611375fa6cad751fb93d528a610b55302710d883"
"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0"
"checksum quick-xml 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fe1e430bdcf30c9fdc25053b9c459bb1a4672af4617b6c783d7d91dc17c6bbb0"
"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
"checksum rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae1b169243eaf61759b8475a998f0a385e42042370f3a7dbaf35246eacc8412"

View File

@ -19,6 +19,7 @@ swf = { path = "../swf" }
enumset = "0.4.2"
smallvec = "1.1.0"
num_enum = "0.4.2"
quick-xml = "0.17.2"
[dependencies.jpeg-decoder]
version = "0.1.18"

View File

@ -20,6 +20,7 @@ mod prelude;
pub mod shape_utils;
pub mod tag_utils;
mod transform;
mod xml;
pub mod backend;

8
core/src/xml.rs Normal file
View File

@ -0,0 +1,8 @@
//! Garbage-collectable XML DOM impl
mod document;
mod tree;
type Error = Box<dyn std::error::Error>;
pub use tree::XMLNode;

62
core/src/xml/document.rs Normal file
View File

@ -0,0 +1,62 @@
//! XML Document
use crate::xml::Error;
use crate::xml::XMLNode;
use gc_arena::{Collect, MutationContext};
use quick_xml::events::Event;
use quick_xml::Reader;
/// The entirety of an XML document.
#[derive(Clone, Collect)]
#[collect(no_drop)]
pub struct XMLDocument<'gc> {
/// The root node(s) of the XML document.
roots: Vec<XMLNode<'gc>>,
}
impl<'gc> XMLDocument<'gc> {
pub fn from_str(mc: MutationContext<'gc, '_>, data: &str) -> Result<Self, Error> {
let mut parser = Reader::from_str(data);
let mut buf = Vec::new();
let mut roots = Vec::new();
let mut open_tags: Vec<XMLNode<'gc>> = Vec::new();
loop {
match parser.read_event(&mut buf)? {
Event::Start(bs) => {
let child = XMLNode::from_start_event(mc, bs)?;
if let Some(node) = open_tags.last_mut() {
node.append_child(mc, child)?;
} else {
roots.push(child);
}
open_tags.push(child);
}
Event::Empty(bs) => {
let child = XMLNode::from_start_event(mc, bs)?;
if let Some(node) = open_tags.last_mut() {
node.append_child(mc, child)?;
} else {
roots.push(child);
}
}
Event::End(_) => {
open_tags.pop();
}
Event::Text(bt) => {
let child = XMLNode::text_from_text_event(mc, bt)?;
if let Some(node) = open_tags.last_mut() {
node.append_child(mc, child)?;
} else {
roots.push(child);
}
}
Event::Eof => break,
_ => {}
}
}
Ok(Self { roots })
}
}

155
core/src/xml/tree.rs Normal file
View File

@ -0,0 +1,155 @@
//! XML Tree structure
use crate::xml::Error;
use gc_arena::{Collect, GcCell, MutationContext};
use quick_xml::events::attributes::Attribute;
use quick_xml::events::{BytesStart, BytesText};
use std::borrow::Cow;
use std::collections::BTreeMap;
/// Represents a scoped name within XML.
///
/// All names in XML are optionally namespaced. Each namespace is represented
/// as a string; the document contains a mapping of namespaces to URIs.
///
/// The special namespace `xmlns` is used to map namespace strings to URIs; it
/// should not be used for user-specified namespaces.
#[derive(Clone, Collect, PartialEq, Eq, PartialOrd, Ord)]
#[collect(no_drop)]
pub struct XMLName {
/// The name of the XML namespace this name is scoped to.
///
/// Names without a namespace use the default namespace.
///
/// Namespaces may be resolved to a URI by consulting the encapsulating
/// document.
namespace: Option<String>,
name: String,
}
impl XMLName {
pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
Self::from_bytes_cow(Cow::Borrowed(bytes))
}
pub fn from_bytes_cow(bytes: Cow<[u8]>) -> Result<Self, Error> {
let full_name = match bytes {
Cow::Borrowed(ln) => Cow::Borrowed(std::str::from_utf8(ln)?),
Cow::Owned(ln) => Cow::Owned(String::from_utf8(ln)?),
};
if let Some(colon_index) = full_name.find(':') {
Ok(Self {
namespace: Some(full_name[0..colon_index].to_owned()),
name: full_name[colon_index + 1..].to_owned(),
})
} else {
Ok(Self {
namespace: None,
name: full_name.into_owned(),
})
}
}
}
/// Represents a node in the XML tree.
#[derive(Copy, Clone, Collect)]
#[collect(no_drop)]
pub struct XMLNode<'gc>(GcCell<'gc, XMLNodeData<'gc>>);
#[derive(Clone, Collect)]
#[collect(no_drop)]
pub enum XMLNodeData<'gc> {
/// A text node in the XML tree.
Text {
/// The string representation of the text.
contents: String,
},
/// A comment node in the XML tree.
Comment {
/// The string representation of the comment.
contents: String,
},
/// An element node in the XML tree.
///
/// Element nodes are non-leaf nodes: they can store additional data as
/// either attributes (for key/value pairs) or child nodes (for more
/// structured data).
Element {
/// The tag name of this element.
tag_name: XMLName,
/// Attributes of the element.
attributes: BTreeMap<XMLName, String>,
/// Child nodes of this element.
children: Vec<XMLNode<'gc>>,
},
}
impl<'gc> XMLNode<'gc> {
/// Construct an XML node from a `quick_xml` `BytesStart` event.
///
/// The returned node will always be an `Element`, and it must only contain
/// valid encoded UTF-8 data. (Other encoding support is planned later.)
pub fn from_start_event<'a>(
mc: MutationContext<'gc, '_>,
bs: BytesStart<'a>,
) -> Result<Self, Error> {
let tag_name = XMLName::from_bytes_cow(bs.unescaped()?)?;
let mut attributes = BTreeMap::new();
for a in bs.attributes() {
let attribute = a?;
attributes.insert(
XMLName::from_bytes(attribute.key)?,
String::from_utf8(attribute.value.to_owned().to_vec())?,
);
}
let children = Vec::new();
Ok(XMLNode(GcCell::allocate(
mc,
XMLNodeData::Element {
tag_name,
attributes,
children,
},
)))
}
pub fn text_from_text_event<'a>(
mc: MutationContext<'gc, '_>,
bt: BytesText<'a>,
) -> Result<Self, Error> {
Ok(XMLNode(GcCell::allocate(
mc,
XMLNodeData::Text {
contents: match bt.unescaped()? {
Cow::Borrowed(ln) => Cow::Borrowed(std::str::from_utf8(ln)?),
Cow::Owned(ln) => Cow::Owned(String::from_utf8(ln)?),
}
.to_owned()
.to_string(),
},
)))
}
pub fn append_child(
&mut self,
mc: MutationContext<'gc, '_>,
child: XMLNode<'gc>,
) -> Result<(), Error> {
match &mut *self.0.write(mc) {
XMLNodeData::Element {
ref mut children, ..
} => children.push(child),
_ => return Err("Not an Element".into()),
};
Ok(())
}
}