From dbddefb44d51301e3e690b222dfe414659b8e9b1 Mon Sep 17 00:00:00 2001 From: relrelb Date: Sun, 13 Feb 2022 01:21:35 +0200 Subject: [PATCH] xml: Refactor `XmlNode::into_string` * Don't use `quick_xml::Writer` for formatting the XML, being much more simple. * Return `WString` instead of `String`, reducing `to_utf8_lossy()` calls except when the string needs to be escaped (attribute values and text contents). --- core/src/avm1/globals/shared_object.rs | 8 +-- core/src/avm1/globals/xml.rs | 2 +- core/src/avm1/globals/xml_node.rs | 11 +--- core/src/xml/tree.rs | 91 +++++++++++--------------- 4 files changed, 44 insertions(+), 68 deletions(-) diff --git a/core/src/avm1/globals/shared_object.rs b/core/src/avm1/globals/shared_object.rs index 5332f5cae..9a7646752 100644 --- a/core/src/avm1/globals/shared_object.rs +++ b/core/src/avm1/globals/shared_object.rs @@ -80,10 +80,10 @@ fn serialize_value<'gc>( let length = o.length(activation).unwrap(); Some(AmfValue::ECMAArray(vec![], values, length as u32)) } else if let Some(xml_node) = o.as_xml_node() { - xml_node - .into_string() - .map(|xml_string| AmfValue::XML(xml_string, true)) - .ok() + Some(AmfValue::XML( + xml_node.into_string().to_utf8_lossy().into_owned(), + true, + )) } else if let Some(date) = o.as_date_object() { date.date_time() .map(|date_time| AmfValue::Date(date_time.timestamp_millis() as f64, None)) diff --git a/core/src/avm1/globals/xml.rs b/core/src/avm1/globals/xml.rs index fc0331d69..cb54b1ca9 100644 --- a/core/src/avm1/globals/xml.rs +++ b/core/src/avm1/globals/xml.rs @@ -294,7 +294,7 @@ fn spawn_xml_fetch<'gc>( let request_options = if let Some(node) = send_object { // Send `node` as string. RequestOptions::post(Some(( - node.into_string().unwrap_or_default().into_bytes(), + node.into_string().to_utf8_lossy().into_owned().into_bytes(), "application/x-www-form-urlencoded".to_string(), ))) } else { diff --git a/core/src/avm1/globals/xml_node.rs b/core/src/avm1/globals/xml_node.rs index 0fa3e4146..971fcb8c1 100644 --- a/core/src/avm1/globals/xml_node.rs +++ b/core/src/avm1/globals/xml_node.rs @@ -211,16 +211,7 @@ fn to_string<'gc>( _args: &[Value<'gc>], ) -> Result, Error<'gc>> { if let Some(node) = this.as_xml_node() { - let result = node.into_string(); - - return Ok(AvmString::new_utf8( - activation.context.gc_context, - result.unwrap_or_else(|e| { - avm_warn!(activation, "XMLNode toString failed: {}", e); - "".to_string() - }), - ) - .into()); + return Ok(AvmString::new(activation.context.gc_context, node.into_string()).into()); } Ok("".into()) diff --git a/core/src/xml/tree.rs b/core/src/xml/tree.rs index d8b456419..7d83da937 100644 --- a/core/src/xml/tree.rs +++ b/core/src/xml/tree.rs @@ -8,13 +8,11 @@ use crate::string::{AvmString, WStr, WString}; use crate::xml; use crate::xml::Error; use gc_arena::{Collect, GcCell, MutationContext}; -use quick_xml::events::attributes::Attribute; -use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event}; -use quick_xml::Writer; +use quick_xml::escape::escape; +use quick_xml::events::{BytesStart, BytesText}; use smallvec::alloc::borrow::Cow; use std::collections::BTreeMap; use std::fmt; -use std::io::{Cursor, Write}; use std::mem::swap; /// Represents a node in the XML tree. @@ -828,80 +826,67 @@ impl<'gc> XmlNode<'gc> { } /// Convert the given node to a string of UTF-8 encoded XML. - pub fn into_string(self) -> Result { - let mut buf = Vec::new(); - let mut writer = Writer::new(Cursor::new(&mut buf)); - self.write_node_to_event_writer(&mut writer)?; - Ok(String::from_utf8(buf)?) + pub fn into_string(self) -> WString { + let mut result = WString::new(); + self.write_node_to_string(&mut result); + result } - /// Write the contents of this node, including its children, to the given writer. - fn write_node_to_event_writer(self, writer: &mut Writer) -> Result<(), Error> - where - W: Write, - { - // TODO: we convert all strings to utf8, replacing unpaired surrogates by the replacement char. + /// Write the contents of this node, including its children, to the given string. + fn write_node_to_string(self, result: &mut WString) { + // TODO: we convert some strings to utf8, replacing unpaired surrogates by the replacement char. // It is correct? let children: Vec<_> = self.children().collect(); - let children_len = children.len(); match &*self.0.read() { - XmlNodeData::DocumentRoot { .. } => Ok(()), + XmlNodeData::DocumentRoot { .. } => {} XmlNodeData::Element { tag_name, attributes, .. } => { - let mut node_name = tag_name.to_utf8_lossy(); - if children_len == 0 { - let mut n = node_name.into_owned(); - n.push(' '); - node_name = n.into(); - } - let mut bs = match node_name { - Cow::Borrowed(name) => BytesStart::borrowed_name(name.as_bytes()), - Cow::Owned(name) => BytesStart::owned_name(name), - }; + result.push_byte(b'<'); + result.push_str(tag_name); + for (key, value) in attributes { - bs.push_attribute(Attribute::from(( - key.to_utf8_lossy().as_ref(), - value.to_utf8_lossy().as_ref(), - ))); + result.push_byte(b' '); + result.push_str(key); + result.push_str(WStr::from_units(b"=\"")); + let encoded_value = value.to_utf8_lossy(); + let escaped_value = escape(encoded_value.as_bytes()); + result.push_str(WStr::from_units(&*escaped_value)); + result.push_byte(b'"'); } - if children_len > 0 { - writer.write_event(&Event::Start(bs)) + if children.is_empty() { + result.push_str(WStr::from_units(b" />")); } else { - writer.write_event(&Event::Empty(bs)) + result.push_byte(b'>'); } } - XmlNodeData::Text { contents, .. } => writer.write_event(&Event::Text( - BytesText::from_plain_str(&contents.to_utf8_lossy()), - )), - }?; + XmlNodeData::Text { contents, .. } => { + let encoded = contents.to_utf8_lossy(); + let escaped = escape(encoded.as_bytes()); + result.push_str(WStr::from_units(&*escaped)); + } + } - for child in children { - child.write_node_to_event_writer(writer)?; + for child in &children { + child.write_node_to_string(result); } match &*self.0.read() { - XmlNodeData::DocumentRoot { .. } => Ok(()), + XmlNodeData::DocumentRoot { .. } => {} XmlNodeData::Element { tag_name, .. } => { - if children_len > 0 { - let bs = match tag_name.to_utf8_lossy() { - Cow::Borrowed(name) => BytesEnd::borrowed(name.as_bytes()), - Cow::Owned(name) => BytesEnd::owned(name.into()), - }; - writer.write_event(&Event::End(bs)) - } else { - Ok(()) + if !children.is_empty() { + result.push_str(WStr::from_units(b"'); } } - XmlNodeData::Text { .. } => Ok(()), - }?; - - Ok(()) + XmlNodeData::Text { .. } => {} + } } }