From 89c9753520bbca60cd13e97365222744f2ade1b7 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 19 Dec 2019 22:19:22 -0500 Subject: [PATCH] Add rudimentary custom DOM impl on top of `quick_xml`. `quick_xml` was chosen due to it's high performance and support for zero-copy use cases. However, we are not using `minidom`, which is the already-extant DOM impl that uses `quick_xml` as it's parsing provider. This is because `minidom` nodes are not amenable to garbage collection. Specifically: we want to be able to construct a new `Object` variant that holds part of an XML node. However, `minidom::Element` directly owns it's children, meaning that we can't hold references to it from within `Object` while also keeping those objects to the `'gc` lifetime. Hence, we provide a GC-exclusive DOM implementation. I ruled out solutions such as holding an entire XML tree in an `Rc` and having AVM objects that shadow them. This works for `SwfSlice` because indexing an array is cheap; but traversing a tree can get very expensive. XML is used in many places in Flash Player, so it's important that we treat it like a first-class citizen. --- Cargo.lock | 10 +++ core/Cargo.toml | 1 + core/src/lib.rs | 1 + core/src/xml.rs | 8 ++ core/src/xml/document.rs | 62 ++++++++++++++++ core/src/xml/tree.rs | 155 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 237 insertions(+) create mode 100644 core/src/xml.rs create mode 100644 core/src/xml/document.rs create mode 100644 core/src/xml/tree.rs diff --git a/Cargo.lock b/Cargo.lock index a743d9aac..5f6159718 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1432,6 +1432,14 @@ name = "quick-error" version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "quick-xml" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "quote" version = "0.6.13" @@ -1575,6 +1583,7 @@ dependencies = [ "minimp3 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "num_enum 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "puremp3 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "quick-xml 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "ruffle_macros 0.1.0", "smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2563,6 +2572,7 @@ dependencies = [ "checksum proc-macro2 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0319972dcae462681daf4da1adeeaa066e3ebd29c69be96c6abb1259d2ee2bcc" "checksum puremp3 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f2b7efbb39e373af70c139e0611375fa6cad751fb93d528a610b55302710d883" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" +"checksum quick-xml 0.17.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fe1e430bdcf30c9fdc25053b9c459bb1a4672af4617b6c783d7d91dc17c6bbb0" "checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" "checksum rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae1b169243eaf61759b8475a998f0a385e42042370f3a7dbaf35246eacc8412" diff --git a/core/Cargo.toml b/core/Cargo.toml index e6bbdf08e..8405a263a 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -19,6 +19,7 @@ swf = { path = "../swf" } enumset = "0.4.2" smallvec = "1.1.0" num_enum = "0.4.2" +quick-xml = "0.17.2" [dependencies.jpeg-decoder] version = "0.1.18" diff --git a/core/src/lib.rs b/core/src/lib.rs index 54da5e57b..b7822b5de 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -20,6 +20,7 @@ mod prelude; pub mod shape_utils; pub mod tag_utils; mod transform; +mod xml; pub mod backend; diff --git a/core/src/xml.rs b/core/src/xml.rs new file mode 100644 index 000000000..9693dd312 --- /dev/null +++ b/core/src/xml.rs @@ -0,0 +1,8 @@ +//! Garbage-collectable XML DOM impl + +mod document; +mod tree; + +type Error = Box; + +pub use tree::XMLNode; diff --git a/core/src/xml/document.rs b/core/src/xml/document.rs new file mode 100644 index 000000000..e14271ec3 --- /dev/null +++ b/core/src/xml/document.rs @@ -0,0 +1,62 @@ +//! XML Document + +use crate::xml::Error; +use crate::xml::XMLNode; +use gc_arena::{Collect, MutationContext}; +use quick_xml::events::Event; +use quick_xml::Reader; + +/// The entirety of an XML document. +#[derive(Clone, Collect)] +#[collect(no_drop)] +pub struct XMLDocument<'gc> { + /// The root node(s) of the XML document. + roots: Vec>, +} + +impl<'gc> XMLDocument<'gc> { + pub fn from_str(mc: MutationContext<'gc, '_>, data: &str) -> Result { + let mut parser = Reader::from_str(data); + let mut buf = Vec::new(); + let mut roots = Vec::new(); + let mut open_tags: Vec> = Vec::new(); + + loop { + match parser.read_event(&mut buf)? { + Event::Start(bs) => { + let child = XMLNode::from_start_event(mc, bs)?; + if let Some(node) = open_tags.last_mut() { + node.append_child(mc, child)?; + } else { + roots.push(child); + } + + open_tags.push(child); + } + Event::Empty(bs) => { + let child = XMLNode::from_start_event(mc, bs)?; + if let Some(node) = open_tags.last_mut() { + node.append_child(mc, child)?; + } else { + roots.push(child); + } + } + Event::End(_) => { + open_tags.pop(); + } + Event::Text(bt) => { + let child = XMLNode::text_from_text_event(mc, bt)?; + if let Some(node) = open_tags.last_mut() { + node.append_child(mc, child)?; + } else { + roots.push(child); + } + } + Event::Eof => break, + _ => {} + } + } + + Ok(Self { roots }) + } +} diff --git a/core/src/xml/tree.rs b/core/src/xml/tree.rs new file mode 100644 index 000000000..371d2dced --- /dev/null +++ b/core/src/xml/tree.rs @@ -0,0 +1,155 @@ +//! XML Tree structure + +use crate::xml::Error; +use gc_arena::{Collect, GcCell, MutationContext}; +use quick_xml::events::attributes::Attribute; +use quick_xml::events::{BytesStart, BytesText}; +use std::borrow::Cow; +use std::collections::BTreeMap; + +/// Represents a scoped name within XML. +/// +/// All names in XML are optionally namespaced. Each namespace is represented +/// as a string; the document contains a mapping of namespaces to URIs. +/// +/// The special namespace `xmlns` is used to map namespace strings to URIs; it +/// should not be used for user-specified namespaces. +#[derive(Clone, Collect, PartialEq, Eq, PartialOrd, Ord)] +#[collect(no_drop)] +pub struct XMLName { + /// The name of the XML namespace this name is scoped to. + /// + /// Names without a namespace use the default namespace. + /// + /// Namespaces may be resolved to a URI by consulting the encapsulating + /// document. + namespace: Option, + name: String, +} + +impl XMLName { + pub fn from_bytes(bytes: &[u8]) -> Result { + Self::from_bytes_cow(Cow::Borrowed(bytes)) + } + + pub fn from_bytes_cow(bytes: Cow<[u8]>) -> Result { + let full_name = match bytes { + Cow::Borrowed(ln) => Cow::Borrowed(std::str::from_utf8(ln)?), + Cow::Owned(ln) => Cow::Owned(String::from_utf8(ln)?), + }; + + if let Some(colon_index) = full_name.find(':') { + Ok(Self { + namespace: Some(full_name[0..colon_index].to_owned()), + name: full_name[colon_index + 1..].to_owned(), + }) + } else { + Ok(Self { + namespace: None, + name: full_name.into_owned(), + }) + } + } +} + +/// Represents a node in the XML tree. +#[derive(Copy, Clone, Collect)] +#[collect(no_drop)] +pub struct XMLNode<'gc>(GcCell<'gc, XMLNodeData<'gc>>); + +#[derive(Clone, Collect)] +#[collect(no_drop)] +pub enum XMLNodeData<'gc> { + /// A text node in the XML tree. + Text { + /// The string representation of the text. + contents: String, + }, + + /// A comment node in the XML tree. + Comment { + /// The string representation of the comment. + contents: String, + }, + + /// An element node in the XML tree. + /// + /// Element nodes are non-leaf nodes: they can store additional data as + /// either attributes (for key/value pairs) or child nodes (for more + /// structured data). + Element { + /// The tag name of this element. + tag_name: XMLName, + + /// Attributes of the element. + attributes: BTreeMap, + + /// Child nodes of this element. + children: Vec>, + }, +} + +impl<'gc> XMLNode<'gc> { + /// Construct an XML node from a `quick_xml` `BytesStart` event. + /// + /// The returned node will always be an `Element`, and it must only contain + /// valid encoded UTF-8 data. (Other encoding support is planned later.) + pub fn from_start_event<'a>( + mc: MutationContext<'gc, '_>, + bs: BytesStart<'a>, + ) -> Result { + let tag_name = XMLName::from_bytes_cow(bs.unescaped()?)?; + let mut attributes = BTreeMap::new(); + + for a in bs.attributes() { + let attribute = a?; + attributes.insert( + XMLName::from_bytes(attribute.key)?, + String::from_utf8(attribute.value.to_owned().to_vec())?, + ); + } + + let children = Vec::new(); + + Ok(XMLNode(GcCell::allocate( + mc, + XMLNodeData::Element { + tag_name, + attributes, + children, + }, + ))) + } + + pub fn text_from_text_event<'a>( + mc: MutationContext<'gc, '_>, + bt: BytesText<'a>, + ) -> Result { + Ok(XMLNode(GcCell::allocate( + mc, + XMLNodeData::Text { + contents: match bt.unescaped()? { + Cow::Borrowed(ln) => Cow::Borrowed(std::str::from_utf8(ln)?), + Cow::Owned(ln) => Cow::Owned(String::from_utf8(ln)?), + } + .to_owned() + .to_string(), + }, + ))) + } + + pub fn append_child( + &mut self, + mc: MutationContext<'gc, '_>, + child: XMLNode<'gc>, + ) -> Result<(), Error> { + match &mut *self.0.write(mc) { + XMLNodeData::Element { + ref mut children, .. + } => children.push(child), + _ => return Err("Not an Element".into()), + }; + + Ok(()) + } +}