From 62a13cd7a61f88be10dc4c2d5b6328b30b6d4c64 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 21 Jan 2020 23:33:47 -0500 Subject: [PATCH] Add the ability to `walk` through an XML tree. This yields nodes as `Step`s. This allows keeping track of the structure of the tree as you walk through descendents, as each element will be yielded twice: both as a `Step::In` *and* as a `Step::Out`. Non-element nodes will be yielded once as a `Step::Around`. I'm adding `walk` iteration specifically to avoid having to write certain methods recursively. Existing recursive callers of `children` should probably be updated to `walk` the tree and maintain a separate `Vec` stack. --- core/src/xml.rs | 2 + core/src/xml/iterators.rs | 143 ++++++++++++++++++++++++++++++++++++++ core/src/xml/tests.rs | 83 ++++++++++++++++++++++ core/src/xml/tree.rs | 105 +++++++++++----------------- 4 files changed, 269 insertions(+), 64 deletions(-) create mode 100644 core/src/xml/iterators.rs diff --git a/core/src/xml.rs b/core/src/xml.rs index 956bd8e7b..ad61dd42c 100644 --- a/core/src/xml.rs +++ b/core/src/xml.rs @@ -2,6 +2,7 @@ mod document; mod error; +mod iterators; mod namespace; mod tree; @@ -11,6 +12,7 @@ mod tests; pub use document::XMLDocument; pub use error::Error; pub use error::ParseError; +pub use iterators::Step; pub use namespace::XMLName; pub use tree::XMLNode; diff --git a/core/src/xml/iterators.rs b/core/src/xml/iterators.rs new file mode 100644 index 000000000..e5f1cb782 --- /dev/null +++ b/core/src/xml/iterators.rs @@ -0,0 +1,143 @@ +//! Iterator types for XML trees + +use crate::xml::XMLNode; + +/// Iterator that yields direct children of an XML node. +pub struct ChildIter<'gc> { + base: XMLNode<'gc>, + index: usize, + back_index: usize, +} + +impl<'gc> ChildIter<'gc> { + /// Construct a new `ChildIter` that lists the children of an XML node. + pub fn for_node(base: XMLNode<'gc>) -> Self { + Self { + base, + index: 0, + back_index: base.children_len(), + } + } + + /// Yield the base element whose children are being read out of. + pub fn base(&self) -> XMLNode<'gc> { + self.base + } +} + +impl<'gc> Iterator for ChildIter<'gc> { + type Item = XMLNode<'gc>; + + fn next(&mut self) -> Option { + if self.index < self.back_index { + let item = self.base.get_child_by_index(self.index); + self.index += 1; + + return item; + } + + None + } +} + +impl<'gc> DoubleEndedIterator for ChildIter<'gc> { + fn next_back(&mut self) -> Option { + if self.index < self.back_index { + self.back_index -= 1; + let item = self.base.get_child_by_index(self.back_index); + + return item; + } + + None + } +} + +/// Indicates the current action being taken by `WalkIter` as it walks +/// throughout the tree. +#[derive(Copy, Clone)] +pub enum Step<'gc> { + /// `WalkIter` has discovered a new element and will begin to yield it's + /// children's steps. + In(XMLNode<'gc>), + + /// `WalkIter` has discovered a non-element node that cannot have children. + /// + /// Note that elements will never be stepped around, even if they are + /// empty. They will be stepped in and out. + Around(XMLNode<'gc>), + + /// `WalkIter` has exhausted the children of an element, stepping out of + /// it. + Out(XMLNode<'gc>), +} + +impl<'gc> Step<'gc> { + /// Discard the information regarding how we approached a given node, and + /// just return the underlying `XMLNode`. + pub fn unwrap(self) -> XMLNode<'gc> { + match self { + Self::In(node) | Self::Around(node) | Self::Out(node) => node, + } + } + + /// Yields true if this step entered an element. + pub fn stepped_in(self) -> bool { + match self { + Self::In(_) => true, + Self::Around(_) | Self::Out(_) => false, + } + } + + /// Yields true if this step encountered a non-element node. + pub fn stepped_around(self) -> bool { + match self { + Self::Around(_) => true, + Self::In(_) | Self::Out(_) => false, + } + } + + /// Yields true if this step exited an element. + pub fn stepped_out(self) -> bool { + match self { + Self::Out(_) => true, + Self::Around(_) | Self::In(_) => false, + } + } +} + +/// Iterator that yields each step needed to visit all indirect descendents of +/// an XML node. +pub struct WalkIter<'gc> { + stack: Vec>, +} + +impl<'gc> WalkIter<'gc> { + /// Construct a new `WalkIter` that lists a tree out in `Step`s. + pub fn for_node(base: XMLNode<'gc>) -> Self { + Self { + stack: vec![ChildIter::for_node(base)], + } + } +} + +impl<'gc> Iterator for WalkIter<'gc> { + type Item = Step<'gc>; + + fn next(&mut self) -> Option { + let last_stack_next = self.stack.last_mut().and_then(|i| i.next()); + + if last_stack_next.is_none() && self.stack.len() > 1 { + let last = self.stack.pop().unwrap(); + return Some(Step::Out(last.base())); + } + + let next_node = last_stack_next?; + if next_node.has_children() { + self.stack.push(ChildIter::for_node(next_node)); + Some(Step::In(next_node)) + } else { + Some(Step::Around(next_node)) + } + } +} diff --git a/core/src/xml/tests.rs b/core/src/xml/tests.rs index 81541628e..897f6b249 100644 --- a/core/src/xml/tests.rs +++ b/core/src/xml/tests.rs @@ -70,6 +70,89 @@ fn double_ended_children() { }) } +/// Tests walking of descendent nodes via Iterator. +#[test] +#[allow(clippy::cognitive_complexity)] +fn walk() { + rootless_arena(|mc| { + let xml = XMLDocument::new(mc); + xml.as_node() + .replace_with_str( + mc, + "test", + ) + .expect("Parsed document"); + + let mut roots = xml + .as_node() + .walk() + .expect("Parsed document should be capable of having child nodes"); + + let root = roots.next().expect("Should have first root"); + assert!(root.stepped_in()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test"))); + + let root = roots.next().expect("Should have first root's child"); + assert!(root.stepped_in()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test2"))); + + let root = roots + .next() + .expect("Should have first root's child step-out"); + assert!(root.stepped_out()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test2"))); + + let root = roots.next().expect("Should have first root step-out"); + assert!(root.stepped_out()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test"))); + + let root = roots.next().expect("Should have second root"); + assert!(root.stepped_in()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test3"))); + + let root = roots + .next() + .expect("Should have second root's text node step-around"); + assert!(root.stepped_around()); + assert_eq!(root.unwrap().node_type(), xml::TEXT_NODE); + assert_eq!(root.unwrap().node_value(), Some("test".to_string())); + + let root = roots.next().expect("Should have second root"); + assert!(root.stepped_out()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test3"))); + + let root = roots.next().expect("Should have last root"); + assert!(root.stepped_in()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test4"))); + + let root = roots.next().expect("Should have last root's child"); + assert!(root.stepped_in()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test5"))); + + let root = roots + .next() + .expect("Should have last root's child step-out"); + assert!(root.stepped_out()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test5"))); + + let root = roots.next().expect("Should have last root step-out"); + assert!(root.stepped_out()); + assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE); + assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test4"))); + + assert!(roots.next().is_none()); + }) +} + /// Tests round-trip XML writing behavior. #[test] fn round_trip_tostring() { diff --git a/core/src/xml/tree.rs b/core/src/xml/tree.rs index 2b4742924..ef9350e2c 100644 --- a/core/src/xml/tree.rs +++ b/core/src/xml/tree.rs @@ -4,7 +4,7 @@ use crate::avm1::xml_attributes_object::XMLAttributesObject; use crate::avm1::xml_object::XMLObject; use crate::avm1::{Object, TObject}; use crate::xml; -use crate::xml::{Error, XMLDocument, XMLName}; +use crate::xml::{Error, Step, XMLDocument, XMLName}; use gc_arena::{Collect, GcCell, MutationContext}; use quick_xml::events::attributes::Attribute; use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event}; @@ -763,75 +763,52 @@ impl<'gc> XMLNode<'gc> { None } + /// Retrieve a given child by index (e.g. position in the document). + pub fn get_child_by_index(self, index: usize) -> Option> { + match &*self.0.read() { + XMLNodeData::Element { children, .. } | XMLNodeData::DocumentRoot { children, .. } => { + Some(children) + } + _ => None, + } + .and_then(|children| children.get(index)) + .cloned() + } + + /// Returns if the node can yield children. + /// + /// Document roots and elements can yield children, while all other + /// elements are structurally prohibited from adopting child `XMLNode`s. + pub fn has_children(self) -> bool { + match &*self.0.read() { + XMLNodeData::Element { .. } | XMLNodeData::DocumentRoot { .. } => true, + _ => false, + } + } + /// Returns an iterator that yields child nodes. /// /// Yields None if this node cannot accept children. pub fn children(self) -> Option>> { - struct ChildIter<'gc> { - base: XMLNode<'gc>, - index: usize, - back_index: usize, - }; - - impl<'gc> ChildIter<'gc> { - fn for_node(base: XMLNode<'gc>) -> Self { - Self { - base, - index: 0, - back_index: base.children_len(), - } - } - } - - impl<'gc> Iterator for ChildIter<'gc> { - type Item = XMLNode<'gc>; - - fn next(&mut self) -> Option { - let read = self.base.0.read(); - let children = match &*read { - XMLNodeData::Element { children, .. } - | XMLNodeData::DocumentRoot { children, .. } => Some(children), - _ => None, - }; - - if let Some(children) = children { - if self.index < self.back_index { - let item = children.get(self.index).cloned(); - self.index += 1; - - return item; - } - } - - None - } - } - - impl<'gc> DoubleEndedIterator for ChildIter<'gc> { - fn next_back(&mut self) -> Option { - let read = self.base.0.read(); - let children = match &*read { - XMLNodeData::Element { children, .. } - | XMLNodeData::DocumentRoot { children, .. } => Some(children), - _ => None, - }; - - if let Some(children) = children { - if self.index < self.back_index { - self.back_index -= 1; - let item = children.get(self.back_index).cloned(); - - return item; - } - } - - None - } - } - match &*self.0.read() { XMLNodeData::Element { .. } | XMLNodeData::DocumentRoot { .. } => { - Some(ChildIter::for_node(self)) + Some(xml::iterators::ChildIter::for_node(self)) + } + _ => None, + } + } + + /// Returns an iterator that walks the XML tree. + /// + /// Walking is similar to using `descendents`, but the ends of parent nodes + /// are explicitly marked with `Step::Out`, while nodes that may have + /// children are marked with `Step::In`. + /// + /// Yields None if this node cannot accept children. + pub fn walk(self) -> Option>> { + match &*self.0.read() { + XMLNodeData::Element { .. } | XMLNodeData::DocumentRoot { .. } => { + Some(xml::iterators::WalkIter::for_node(self)) } _ => None, }