Add the ability to `walk` through an XML tree.

This yields nodes as `Step`s. This allows keeping track of the structure of the tree as you walk through descendents, as each element will be yielded twice: both as a `Step::In` *and* as a `Step::Out`. Non-element nodes will be yielded once as a `Step::Around`.

I'm adding `walk` iteration specifically to avoid having to write certain methods recursively. Existing recursive callers of `children` should probably be updated to `walk` the tree and maintain a separate `Vec` stack.
This commit is contained in:
David Wendt 2020-01-21 23:33:47 -05:00
parent 4eca2d4bdd
commit 62a13cd7a6
4 changed files with 269 additions and 64 deletions

View File

@ -2,6 +2,7 @@
mod document;
mod error;
mod iterators;
mod namespace;
mod tree;
@ -11,6 +12,7 @@ mod tests;
pub use document::XMLDocument;
pub use error::Error;
pub use error::ParseError;
pub use iterators::Step;
pub use namespace::XMLName;
pub use tree::XMLNode;

143
core/src/xml/iterators.rs Normal file
View File

@ -0,0 +1,143 @@
//! Iterator types for XML trees
use crate::xml::XMLNode;
/// Iterator that yields direct children of an XML node.
pub struct ChildIter<'gc> {
base: XMLNode<'gc>,
index: usize,
back_index: usize,
}
impl<'gc> ChildIter<'gc> {
/// Construct a new `ChildIter` that lists the children of an XML node.
pub fn for_node(base: XMLNode<'gc>) -> Self {
Self {
base,
index: 0,
back_index: base.children_len(),
}
}
/// Yield the base element whose children are being read out of.
pub fn base(&self) -> XMLNode<'gc> {
self.base
}
}
impl<'gc> Iterator for ChildIter<'gc> {
type Item = XMLNode<'gc>;
fn next(&mut self) -> Option<Self::Item> {
if self.index < self.back_index {
let item = self.base.get_child_by_index(self.index);
self.index += 1;
return item;
}
None
}
}
impl<'gc> DoubleEndedIterator for ChildIter<'gc> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.index < self.back_index {
self.back_index -= 1;
let item = self.base.get_child_by_index(self.back_index);
return item;
}
None
}
}
/// Indicates the current action being taken by `WalkIter` as it walks
/// throughout the tree.
#[derive(Copy, Clone)]
pub enum Step<'gc> {
/// `WalkIter` has discovered a new element and will begin to yield it's
/// children's steps.
In(XMLNode<'gc>),
/// `WalkIter` has discovered a non-element node that cannot have children.
///
/// Note that elements will never be stepped around, even if they are
/// empty. They will be stepped in and out.
Around(XMLNode<'gc>),
/// `WalkIter` has exhausted the children of an element, stepping out of
/// it.
Out(XMLNode<'gc>),
}
impl<'gc> Step<'gc> {
/// Discard the information regarding how we approached a given node, and
/// just return the underlying `XMLNode`.
pub fn unwrap(self) -> XMLNode<'gc> {
match self {
Self::In(node) | Self::Around(node) | Self::Out(node) => node,
}
}
/// Yields true if this step entered an element.
pub fn stepped_in(self) -> bool {
match self {
Self::In(_) => true,
Self::Around(_) | Self::Out(_) => false,
}
}
/// Yields true if this step encountered a non-element node.
pub fn stepped_around(self) -> bool {
match self {
Self::Around(_) => true,
Self::In(_) | Self::Out(_) => false,
}
}
/// Yields true if this step exited an element.
pub fn stepped_out(self) -> bool {
match self {
Self::Out(_) => true,
Self::Around(_) | Self::In(_) => false,
}
}
}
/// Iterator that yields each step needed to visit all indirect descendents of
/// an XML node.
pub struct WalkIter<'gc> {
stack: Vec<ChildIter<'gc>>,
}
impl<'gc> WalkIter<'gc> {
/// Construct a new `WalkIter` that lists a tree out in `Step`s.
pub fn for_node(base: XMLNode<'gc>) -> Self {
Self {
stack: vec![ChildIter::for_node(base)],
}
}
}
impl<'gc> Iterator for WalkIter<'gc> {
type Item = Step<'gc>;
fn next(&mut self) -> Option<Self::Item> {
let last_stack_next = self.stack.last_mut().and_then(|i| i.next());
if last_stack_next.is_none() && self.stack.len() > 1 {
let last = self.stack.pop().unwrap();
return Some(Step::Out(last.base()));
}
let next_node = last_stack_next?;
if next_node.has_children() {
self.stack.push(ChildIter::for_node(next_node));
Some(Step::In(next_node))
} else {
Some(Step::Around(next_node))
}
}
}

View File

@ -70,6 +70,89 @@ fn double_ended_children() {
})
}
/// Tests walking of descendent nodes via Iterator.
#[test]
#[allow(clippy::cognitive_complexity)]
fn walk() {
rootless_arena(|mc| {
let xml = XMLDocument::new(mc);
xml.as_node()
.replace_with_str(
mc,
"<test><test2></test2></test><test3>test</test3><test4><test5></test5></test4>",
)
.expect("Parsed document");
let mut roots = xml
.as_node()
.walk()
.expect("Parsed document should be capable of having child nodes");
let root = roots.next().expect("Should have first root");
assert!(root.stepped_in());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test")));
let root = roots.next().expect("Should have first root's child");
assert!(root.stepped_in());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test2")));
let root = roots
.next()
.expect("Should have first root's child step-out");
assert!(root.stepped_out());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test2")));
let root = roots.next().expect("Should have first root step-out");
assert!(root.stepped_out());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test")));
let root = roots.next().expect("Should have second root");
assert!(root.stepped_in());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test3")));
let root = roots
.next()
.expect("Should have second root's text node step-around");
assert!(root.stepped_around());
assert_eq!(root.unwrap().node_type(), xml::TEXT_NODE);
assert_eq!(root.unwrap().node_value(), Some("test".to_string()));
let root = roots.next().expect("Should have second root");
assert!(root.stepped_out());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test3")));
let root = roots.next().expect("Should have last root");
assert!(root.stepped_in());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test4")));
let root = roots.next().expect("Should have last root's child");
assert!(root.stepped_in());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test5")));
let root = roots
.next()
.expect("Should have last root's child step-out");
assert!(root.stepped_out());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test5")));
let root = roots.next().expect("Should have last root step-out");
assert!(root.stepped_out());
assert_eq!(root.unwrap().node_type(), xml::ELEMENT_NODE);
assert_eq!(root.unwrap().tag_name(), Some(XMLName::from_str("test4")));
assert!(roots.next().is_none());
})
}
/// Tests round-trip XML writing behavior.
#[test]
fn round_trip_tostring() {

View File

@ -4,7 +4,7 @@ use crate::avm1::xml_attributes_object::XMLAttributesObject;
use crate::avm1::xml_object::XMLObject;
use crate::avm1::{Object, TObject};
use crate::xml;
use crate::xml::{Error, XMLDocument, XMLName};
use crate::xml::{Error, Step, XMLDocument, XMLName};
use gc_arena::{Collect, GcCell, MutationContext};
use quick_xml::events::attributes::Attribute;
use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
@ -763,75 +763,52 @@ impl<'gc> XMLNode<'gc> {
None
}
/// Retrieve a given child by index (e.g. position in the document).
pub fn get_child_by_index(self, index: usize) -> Option<XMLNode<'gc>> {
match &*self.0.read() {
XMLNodeData::Element { children, .. } | XMLNodeData::DocumentRoot { children, .. } => {
Some(children)
}
_ => None,
}
.and_then(|children| children.get(index))
.cloned()
}
/// Returns if the node can yield children.
///
/// Document roots and elements can yield children, while all other
/// elements are structurally prohibited from adopting child `XMLNode`s.
pub fn has_children(self) -> bool {
match &*self.0.read() {
XMLNodeData::Element { .. } | XMLNodeData::DocumentRoot { .. } => true,
_ => false,
}
}
/// Returns an iterator that yields child nodes.
///
/// Yields None if this node cannot accept children.
pub fn children(self) -> Option<impl DoubleEndedIterator<Item = XMLNode<'gc>>> {
struct ChildIter<'gc> {
base: XMLNode<'gc>,
index: usize,
back_index: usize,
};
impl<'gc> ChildIter<'gc> {
fn for_node(base: XMLNode<'gc>) -> Self {
Self {
base,
index: 0,
back_index: base.children_len(),
}
}
}
impl<'gc> Iterator for ChildIter<'gc> {
type Item = XMLNode<'gc>;
fn next(&mut self) -> Option<Self::Item> {
let read = self.base.0.read();
let children = match &*read {
XMLNodeData::Element { children, .. }
| XMLNodeData::DocumentRoot { children, .. } => Some(children),
_ => None,
};
if let Some(children) = children {
if self.index < self.back_index {
let item = children.get(self.index).cloned();
self.index += 1;
return item;
}
}
None
}
}
impl<'gc> DoubleEndedIterator for ChildIter<'gc> {
fn next_back(&mut self) -> Option<Self::Item> {
let read = self.base.0.read();
let children = match &*read {
XMLNodeData::Element { children, .. }
| XMLNodeData::DocumentRoot { children, .. } => Some(children),
_ => None,
};
if let Some(children) = children {
if self.index < self.back_index {
self.back_index -= 1;
let item = children.get(self.back_index).cloned();
return item;
}
}
None
}
}
match &*self.0.read() {
XMLNodeData::Element { .. } | XMLNodeData::DocumentRoot { .. } => {
Some(ChildIter::for_node(self))
Some(xml::iterators::ChildIter::for_node(self))
}
_ => None,
}
}
/// Returns an iterator that walks the XML tree.
///
/// Walking is similar to using `descendents`, but the ends of parent nodes
/// are explicitly marked with `Step::Out`, while nodes that may have
/// children are marked with `Step::In`.
///
/// Yields None if this node cannot accept children.
pub fn walk(self) -> Option<impl Iterator<Item = Step<'gc>>> {
match &*self.0.read() {
XMLNodeData::Element { .. } | XMLNodeData::DocumentRoot { .. } => {
Some(xml::iterators::WalkIter::for_node(self))
}
_ => None,
}