From c2349ae01290b3c20f0a6a140c2df39bffc43b83 Mon Sep 17 00:00:00 2001 From: Moulins Date: Fri, 1 Oct 2021 01:13:06 +0200 Subject: [PATCH] xml: use AvmString in xml code This has the nice side-effect of reducing string cloning, because we can just pass AvmStrings around instead. --- core/src/avm1/activation.rs | 11 +- core/src/avm1/globals/xml.rs | 52 ++--- core/src/avm1/object/xml_attributes_object.rs | 20 +- core/src/avm1/object/xml_idmap_object.rs | 11 +- core/src/avm1/object/xml_object.rs | 3 +- core/src/html/text_format.rs | 119 ++++++----- core/src/string/parse.rs | 6 +- core/src/xml/document.rs | 19 +- core/src/xml/namespace.rs | 115 +++++------ core/src/xml/tests.rs | 21 +- core/src/xml/tree.rs | 192 ++++++++++-------- 11 files changed, 290 insertions(+), 279 deletions(-) diff --git a/core/src/avm1/activation.rs b/core/src/avm1/activation.rs index 2904c281a..523500027 100644 --- a/core/src/avm1/activation.rs +++ b/core/src/avm1/activation.rs @@ -1928,7 +1928,11 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { } else { avm_warn!(self, "SetTarget failed: {} not found", target); // TODO: Emulate AVM1 trace error message. - let path = if base_clip.removed() { None } else { Some(base_clip.path()) }; + let path = if base_clip.removed() { + None + } else { + Some(base_clip.path()) + }; let message = format!( "Target not found: Target=\"{}\" Base=\"{}\"", target, @@ -2550,10 +2554,7 @@ impl<'a, 'gc, 'gc_context> Activation<'a, 'gc, 'gc_context> { path = path.trim_start_matches(b':'); let prefix = path.slice(..path.len().min(3)); - let val = if prefix == b".." - || prefix == b"../" - || prefix == b"..:" - { + let val = if prefix == b".." || prefix == b"../" || prefix == b"..:" { // Check for .. // SWF-4 style _parent if path.try_get(2) == Some(u16::from(b'/')) { diff --git a/core/src/avm1/globals/xml.rs b/core/src/avm1/globals/xml.rs index 10c9a6fe3..604fff4a8 100644 --- a/core/src/avm1/globals/xml.rs +++ b/core/src/avm1/globals/xml.rs @@ -7,7 +7,7 @@ use crate::avm1::property_decl::{define_properties_on, Declaration}; use crate::avm1::{ArrayObject, Object, TObject, Value}; use crate::avm_warn; use crate::backend::navigator::RequestOptions; -use crate::string::AvmString; +use crate::string::{AvmString, BorrowWStr}; use crate::xml; use crate::xml::{XmlDocument, XmlNode}; use gc_arena::MutationContext; @@ -93,13 +93,13 @@ pub fn xmlnode_constructor<'gc>( ) { (Some(Ok(1)), Some(Ok(ref strval)), Some(ref mut this_node)) => { let mut xmlelement = - XmlNode::new_element(activation.context.gc_context, strval, blank_document); + XmlNode::new_element(activation.context.gc_context, *strval, blank_document); xmlelement.introduce_script_object(activation.context.gc_context, this); this_node.swap(activation.context.gc_context, xmlelement); } (Some(Ok(3)), Some(Ok(ref strval)), Some(ref mut this_node)) => { let mut xmlelement = - XmlNode::new_text(activation.context.gc_context, strval, blank_document); + XmlNode::new_text(activation.context.gc_context, *strval, blank_document); xmlelement.introduce_script_object(activation.context.gc_context, this); this_node.swap(activation.context.gc_context, xmlelement); } @@ -200,8 +200,10 @@ pub fn xmlnode_get_namespace_for_prefix<'gc>( this.as_xml_node(), args.get(0).map(|v| v.coerce_to_string(activation)), ) { - if let Some(uri) = xmlnode.lookup_uri_for_namespace(&prefix_string?) { - Ok(AvmString::new(activation.context.gc_context, uri).into()) + if let Some(uri) = + xmlnode.lookup_uri_for_namespace(activation.context.gc_context, prefix_string?.borrow()) + { + Ok(uri.into()) } else { Ok(Value::Null) } @@ -219,8 +221,8 @@ pub fn xmlnode_get_prefix_for_namespace<'gc>( this.as_xml_node(), args.get(0).map(|v| v.coerce_to_string(activation)), ) { - if let Some(prefix) = xmlnode.lookup_namespace_for_uri(&uri_string?) { - Ok(AvmString::new(activation.context.gc_context, prefix).into()) + if let Some(prefix) = xmlnode.lookup_namespace_for_uri(uri_string?.borrow()) { + Ok(AvmString::new_ucs2(activation.context.gc_context, prefix).into()) } else { Ok(Value::Null) } @@ -322,14 +324,14 @@ pub fn xmlnode_node_type<'gc>( } pub fn xmlnode_node_value<'gc>( - activation: &mut Activation<'_, 'gc, '_>, + _activation: &mut Activation<'_, 'gc, '_>, this: Object<'gc>, _args: &[Value<'gc>], ) -> Result, Error<'gc>> { Ok(this .as_xml_node() .and_then(|n| n.node_value()) - .map(|n| AvmString::new(activation.context.gc_context, n).into()) + .map(|v| v.into()) .unwrap_or_else(|| Value::Null)) } @@ -342,13 +344,10 @@ pub fn xmlnode_prefix<'gc>( .as_xml_node() .and_then(|n| n.tag_name()) .map(|n| { - AvmString::new( - activation.context.gc_context, - n.prefix() - .map(|n| n.to_string()) - .unwrap_or_else(|| "".to_string()), - ) - .into() + n.prefix() + .map(|n| AvmString::new_ucs2(activation.context.gc_context, n.into())) + .unwrap_or_default() + .into() }) .unwrap_or_else(|| Value::Null)) } @@ -540,12 +539,13 @@ pub fn xmlnode_namespace_uri<'gc>( ) -> Result, Error<'gc>> { if let Some(node) = this.as_xml_node() { if let Some(name) = node.tag_name() { - return Ok(AvmString::new( - activation.context.gc_context, - node.lookup_uri_for_namespace(name.prefix().unwrap_or("")) - .unwrap_or_else(|| "".to_string()), - ) - .into()); + return Ok(node + .lookup_uri_for_namespace( + activation.context.gc_context, + name.prefix().unwrap_or_default(), + ) + .unwrap_or_default() + .into()); } return Ok(Value::Null); @@ -587,7 +587,7 @@ pub fn xml_constructor<'gc>( if let Err(e) = this_node.replace_with_str( activation.context.gc_context, - string, + string.borrow(), true, ignore_whitespace, ) { @@ -626,7 +626,7 @@ pub fn xml_create_element<'gc>( .get(0) .map(|v| v.coerce_to_string(activation).unwrap_or_default()) .unwrap_or_default(); - let mut xml_node = XmlNode::new_element(activation.context.gc_context, &nodename, document); + let mut xml_node = XmlNode::new_element(activation.context.gc_context, nodename, document); let object = XmlObject::from_xml_node( activation.context.gc_context, xml_node, @@ -653,7 +653,7 @@ pub fn xml_create_text_node<'gc>( .get(0) .map(|v| v.coerce_to_string(activation).unwrap_or_default()) .unwrap_or_default(); - let mut xml_node = XmlNode::new_text(activation.context.gc_context, &text_node, document); + let mut xml_node = XmlNode::new_text(activation.context.gc_context, text_node, document); let object = XmlObject::from_xml_node( activation.context.gc_context, xml_node, @@ -696,7 +696,7 @@ pub fn xml_parse_xml<'gc>( let result = node.replace_with_str( activation.context.gc_context, - &xmlstring, + xmlstring.borrow(), true, ignore_whitespace, ); diff --git a/core/src/avm1/object/xml_attributes_object.rs b/core/src/avm1/object/xml_attributes_object.rs index f4cbb2b82..c4a4c87c6 100644 --- a/core/src/avm1/object/xml_attributes_object.rs +++ b/core/src/avm1/object/xml_attributes_object.rs @@ -59,11 +59,11 @@ impl<'gc> TObject<'gc> for XmlAttributesObject<'gc> { fn get_local_stored( &self, name: impl Into>, - activation: &mut Activation<'_, 'gc, '_>, + _activation: &mut Activation<'_, 'gc, '_>, ) -> Option> { self.node() - .attribute_value(&XmlName::from_str(&name.into())) - .map(|s| AvmString::new(activation.context.gc_context, s).into()) + .attribute_value(XmlName::from_str(name)) + .map(|s| s.into()) } fn set_local( @@ -75,8 +75,8 @@ impl<'gc> TObject<'gc> for XmlAttributesObject<'gc> { ) -> Result<(), Error<'gc>> { self.node().set_attribute_value( activation.context.gc_context, - &XmlName::from_str(&name), - &value.coerce_to_string(activation)?, + XmlName::from_str(name), + value.coerce_to_string(activation)?, ); Ok(()) } @@ -119,7 +119,7 @@ impl<'gc> TObject<'gc> for XmlAttributesObject<'gc> { fn delete(&self, activation: &mut Activation<'_, 'gc, '_>, name: AvmString<'gc>) -> bool { self.node() - .delete_attribute(activation.context.gc_context, &XmlName::from_str(&name)); + .delete_attribute(activation.context.gc_context, XmlName::from_str(name)); self.base().delete(activation, name) } @@ -207,7 +207,7 @@ impl<'gc> TObject<'gc> for XmlAttributesObject<'gc> { name: AvmString<'gc>, ) -> bool { self.node() - .attribute_value(&XmlName::from_str(&name)) + .attribute_value(XmlName::from_str(name)) .is_some() } @@ -229,11 +229,7 @@ impl<'gc> TObject<'gc> for XmlAttributesObject<'gc> { fn get_keys(&self, activation: &mut Activation<'_, 'gc, '_>) -> Vec> { let mut base = self.base().get_keys(activation); - let attrs = self - .node() - .attribute_keys() - .into_iter() - .map(|a| AvmString::new(activation.context.gc_context, a)); + let attrs = self.node().attribute_keys().into_iter(); base.extend(attrs); base } diff --git a/core/src/avm1/object/xml_idmap_object.rs b/core/src/avm1/object/xml_idmap_object.rs index 071e3aac8..fd4774806 100644 --- a/core/src/avm1/object/xml_idmap_object.rs +++ b/core/src/avm1/object/xml_idmap_object.rs @@ -61,7 +61,7 @@ impl<'gc> TObject<'gc> for XmlIdMapObject<'gc> { activation: &mut Activation<'_, 'gc, '_>, ) -> Option> { let name = name.into(); - if let Some(mut node) = self.document().get_node_by_id(&name) { + if let Some(mut node) = self.document().get_node_by_id(name) { Some( node.script_object( activation.context.gc_context, @@ -207,7 +207,7 @@ impl<'gc> TObject<'gc> for XmlIdMapObject<'gc> { activation: &mut Activation<'_, 'gc, '_>, name: AvmString<'gc>, ) -> bool { - self.document().get_node_by_id(&name).is_some() + self.document().get_node_by_id(name).is_some() || self.base().has_own_property(activation, name) } @@ -229,12 +229,7 @@ impl<'gc> TObject<'gc> for XmlIdMapObject<'gc> { fn get_keys(&self, activation: &mut Activation<'_, 'gc, '_>) -> Vec> { let mut keys = self.base().get_keys(activation); - keys.extend( - self.document() - .get_node_ids() - .into_iter() - .map(|n| AvmString::new(activation.context.gc_context, n)), - ); + keys.extend(self.document().get_node_ids().into_iter()); keys } diff --git a/core/src/avm1/object/xml_object.rs b/core/src/avm1/object/xml_object.rs index 5ca1413d9..eda2e82d4 100644 --- a/core/src/avm1/object/xml_object.rs +++ b/core/src/avm1/object/xml_object.rs @@ -5,6 +5,7 @@ use crate::avm1::error::Error; use crate::avm1::object::TObject; use crate::avm1::{Object, ScriptObject}; use crate::impl_custom_object; +use crate::string::AvmString; use crate::xml::{XmlDocument, XmlNode}; use gc_arena::{Collect, GcCell, MutationContext}; use std::fmt; @@ -28,7 +29,7 @@ impl<'gc> XmlObject<'gc> { proto: Option>, ) -> Object<'gc> { let empty_document = XmlDocument::new(gc_context); - let mut xml_node = XmlNode::new_text(gc_context, "", empty_document); + let mut xml_node = XmlNode::new_text(gc_context, AvmString::default(), empty_document); let base_object = ScriptObject::object(gc_context, proto); let object = XmlObject(GcCell::allocate( gc_context, diff --git a/core/src/html/text_format.rs b/core/src/html/text_format.rs index 17ba3fb9b..1e5dcfd7f 100644 --- a/core/src/html/text_format.rs +++ b/core/src/html/text_format.rs @@ -2,6 +2,7 @@ use crate::context::UpdateContext; use crate::html::iterators::TextSpanIter; +use crate::string::AvmString; use crate::tag_utils::SwfMovie; use crate::xml::{XmlDocument, XmlName, XmlNode}; use gc_arena::{Collect, MutationContext}; @@ -1112,58 +1113,59 @@ impl FormatSpans { || ls.tab_stops != span.tab_stops || last_text_format_element.is_none() { - let new_tf = XmlNode::new_element(mc, "TEXTFORMAT", document); + let new_tf = XmlNode::new_element(mc, "TEXTFORMAT".into(), document); if ls.left_margin != 0.0 { new_tf.set_attribute_value( mc, - &XmlName::from_str("LEFTMARGIN"), - &format!("{}", span.left_margin), + XmlName::from_str("LEFTMARGIN"), + AvmString::new(mc, span.left_margin.to_string()), ); } if ls.right_margin != 0.0 { new_tf.set_attribute_value( mc, - &XmlName::from_str("RIGHTMARGIN"), - &format!("{}", span.right_margin), + XmlName::from_str("RIGHTMARGIN"), + AvmString::new(mc, span.right_margin.to_string()), ); } if ls.indent != 0.0 { new_tf.set_attribute_value( mc, - &XmlName::from_str("INDENT"), - &format!("{}", span.indent), + XmlName::from_str("INDENT"), + AvmString::new(mc, span.indent.to_string()), ); } if ls.block_indent != 0.0 { new_tf.set_attribute_value( mc, - &XmlName::from_str("BLOCKINDENT"), - &format!("{}", span.block_indent), + XmlName::from_str("BLOCKINDENT"), + AvmString::new(mc, span.block_indent.to_string()), ); } if ls.leading != 0.0 { new_tf.set_attribute_value( mc, - &XmlName::from_str("LEADING"), - &format!("{}", span.leading), + XmlName::from_str("LEADING"), + AvmString::new(mc, span.leading.to_string()), ); } if !ls.tab_stops.is_empty() { + let tab_stops = span + .tab_stops + .iter() + .map(|s| format!("{}", s)) + .collect::>() + .join(","); new_tf.set_attribute_value( mc, - &XmlName::from_str("TABSTOPS"), - &span - .tab_stops - .iter() - .map(|s| format!("{}", s)) - .collect::>() - .join(","), + XmlName::from_str("TABSTOPS"), + AvmString::new(mc, tab_stops), ); } @@ -1184,7 +1186,7 @@ impl FormatSpans { if can_span_create_bullets && span.bullet || !can_span_create_bullets && last_span.map(|ls| ls.bullet).unwrap_or(false) { - let new_li = XmlNode::new_element(mc, "LI", document); + let new_li = XmlNode::new_element(mc, "LI".into(), document); last_bullet = Some(new_li); last_paragraph = None; @@ -1201,18 +1203,15 @@ impl FormatSpans { } if ls.align != span.align || last_paragraph.is_none() { - let new_p = XmlNode::new_element(mc, "P", document); + let new_p = XmlNode::new_element(mc, "P".into(), document); + let align: &str = match span.align { + swf::TextAlign::Left => "LEFT", + swf::TextAlign::Center => "CENTER", + swf::TextAlign::Right => "RIGHT", + swf::TextAlign::Justify => "JUSTIFY", + }; - new_p.set_attribute_value( - mc, - &XmlName::from_str("ALIGN"), - match span.align { - swf::TextAlign::Left => "LEFT", - swf::TextAlign::Center => "CENTER", - swf::TextAlign::Right => "RIGHT", - swf::TextAlign::Justify => "JUSTIFY", - }, - ); + new_p.set_attribute_value(mc, XmlName::from_str("ALIGN"), align.into()); last_bullet .or(last_text_format_element) @@ -1234,44 +1233,50 @@ impl FormatSpans { || ls.kerning != span.kerning || last_font.is_none() { - let new_font = XmlNode::new_element(mc, "FONT", document); + let new_font = XmlNode::new_element(mc, "FONT".into(), document); if ls.font != span.font || last_font.is_none() { - new_font.set_attribute_value(mc, &XmlName::from_str("FACE"), &span.font); + new_font.set_attribute_value( + mc, + XmlName::from_str("FACE"), + // TODO(moulins): remove this alloc + AvmString::new(mc, span.font.clone()), + ); } if ls.size != span.size || last_font.is_none() { new_font.set_attribute_value( mc, - &XmlName::from_str("SIZE"), - &format!("{}", span.size), + XmlName::from_str("SIZE"), + AvmString::new(mc, span.size.to_string()), ); } if ls.color != span.color || last_font.is_none() { + let color = format!( + "#{:0>2X}{:0>2X}{:0>2X}", + span.color.r, span.color.g, span.color.b + ); new_font.set_attribute_value( mc, - &XmlName::from_str("COLOR"), - &format!( - "#{:0>2X}{:0>2X}{:0>2X}", - span.color.r, span.color.g, span.color.b - ), + XmlName::from_str("COLOR"), + AvmString::new(mc, color), ); } if ls.letter_spacing != span.letter_spacing || last_font.is_none() { new_font.set_attribute_value( mc, - &XmlName::from_str("LETTERSPACING"), - &format!("{}", span.letter_spacing), + XmlName::from_str("LETTERSPACING"), + AvmString::new(mc, span.letter_spacing.to_string()), ); } if ls.kerning != span.kerning || last_font.is_none() { new_font.set_attribute_value( mc, - &XmlName::from_str("KERNING"), - if span.kerning { "1" } else { "0" }, + XmlName::from_str("KERNING"), + if span.kerning { "1".into() } else { "0".into() }, ); } @@ -1291,12 +1296,22 @@ impl FormatSpans { } if !span.url.is_empty() && (ls.url != span.url || last_a.is_none()) { - let new_a = XmlNode::new_element(mc, "A", document); + let new_a = XmlNode::new_element(mc, "A".into(), document); - new_a.set_attribute_value(mc, &XmlName::from_str("HREF"), &span.url); + new_a.set_attribute_value( + mc, + XmlName::from_str("HREF"), + // TODO(moulins): avoid this alloc + AvmString::new(mc, span.url.clone()), + ); if !span.target.is_empty() { - new_a.set_attribute_value(mc, &XmlName::from_str("TARGET"), &span.target); + new_a.set_attribute_value( + mc, + XmlName::from_str("TARGET"), + // TODO(moulins): avoid this alloc + AvmString::new(mc, span.target.clone()), + ); } last_font @@ -1318,7 +1333,7 @@ impl FormatSpans { } if span.bold && last_b.is_none() { - let new_b = XmlNode::new_element(mc, "B", document); + let new_b = XmlNode::new_element(mc, "B".into(), document); last_a .or(last_font) @@ -1339,7 +1354,7 @@ impl FormatSpans { } if span.italic && last_i.is_none() { - let new_i = XmlNode::new_element(mc, "I", document); + let new_i = XmlNode::new_element(mc, "I".into(), document); last_b .or(last_a) @@ -1359,7 +1374,7 @@ impl FormatSpans { } if span.underline && last_u.is_none() { - let new_u = XmlNode::new_element(mc, "U", document); + let new_u = XmlNode::new_element(mc, "U".into(), document); last_i .or(last_b) @@ -1378,7 +1393,8 @@ impl FormatSpans { } let span_text = if last_bullet.is_some() { - XmlNode::new_text(mc, line, document) + // TODO(moulins): remove this UTF8 conversion + XmlNode::new_text(mc, AvmString::new(mc, line), document) } else { let line_start = line.as_ptr() as usize - text.as_ptr() as usize; let line_with_newline = if line_start > 0 { @@ -1388,7 +1404,8 @@ impl FormatSpans { line }; - XmlNode::new_text(mc, line_with_newline, document) + // TODO(moulins): remove this UTF8 conversion + XmlNode::new_text(mc, AvmString::new(mc, line_with_newline), document) }; last_u diff --git a/core/src/string/parse.rs b/core/src/string/parse.rs index df3114947..79084c4ef 100644 --- a/core/src/string/parse.rs +++ b/core/src/string/parse.rs @@ -101,7 +101,7 @@ macro_rules! impl_int_parse { )* } } -impl_int_parse! { u32 i32 usize } +impl_int_parse! { u8 u32 i32 usize } macro_rules! impl_wrapping_int_parse { ($($ty:ty)*) => { $( @@ -132,7 +132,7 @@ macro_rules! impl_wrapping_int_parse { )* } } -impl_wrapping_int_parse! { u32 i32 usize } +impl_wrapping_int_parse! { u8 u32 i32 usize } macro_rules! impl_from_str_int { ($($ty:ty)*) => { $( @@ -168,4 +168,4 @@ macro_rules! impl_from_str_int { )* } } -impl_from_str_int! { u32 i32 usize } +impl_from_str_int! { u8 u32 i32 usize } diff --git a/core/src/xml/document.rs b/core/src/xml/document.rs index 0c129a97a..4e4a945f1 100644 --- a/core/src/xml/document.rs +++ b/core/src/xml/document.rs @@ -2,6 +2,7 @@ use crate::avm1::object::xml_idmap_object::XmlIdMapObject; use crate::avm1::Object; +use crate::string::AvmString; use crate::xml::{Error, ParseError, XmlName, XmlNode}; use gc_arena::{Collect, GcCell, MutationContext}; use quick_xml::events::{BytesDecl, Event}; @@ -25,13 +26,13 @@ pub struct XmlDocumentData<'gc> { has_xmldecl: bool, /// The XML version string, if set. - version: String, + version: String, // TODO(moulins)? /// The XML document encoding, if set. - encoding: Option, + encoding: Option, // TODO(moulins)? /// The XML standalone flag, if set. - standalone: Option, + standalone: Option, // TODO(moulins)? /// The XML doctype, if set. doctype: Option>, @@ -41,7 +42,7 @@ pub struct XmlDocumentData<'gc> { /// When nodes are parsed into the document by way of `parseXML` or the /// document constructor, they get put into this list here, which is used /// to populate the document's `idMap`. - idmap: BTreeMap>, + idmap: BTreeMap, XmlNode<'gc>>, /// The script object associated with this XML node, if any. idmap_script_object: Option>, @@ -214,7 +215,7 @@ impl<'gc> XmlDocument<'gc> { /// Update the idmap object with a given new node. pub fn update_idmap(&mut self, mc: MutationContext<'gc, '_>, node: XmlNode<'gc>) { - if let Some(id) = node.attribute_value(&XmlName::from_str("id")) { + if let Some(id) = node.attribute_value(XmlName::from_str("id")) { self.0.write(mc).idmap.insert(id, node); } } @@ -225,16 +226,16 @@ impl<'gc> XmlDocument<'gc> { /// parsing*. Nodes which obtained the `id` after the fact, or nodes with /// the `id` that were added to the document after the fact, will not be /// returned by this function. - pub fn get_node_by_id(self, id: &str) -> Option> { - self.0.read().idmap.get(id).copied() + pub fn get_node_by_id(self, id: AvmString<'gc>) -> Option> { + self.0.read().idmap.get(&id).copied() } /// Retrieve all IDs currently present in the idmap. - pub fn get_node_ids(self) -> HashSet { + pub fn get_node_ids(self) -> HashSet> { let mut result = HashSet::new(); for key in self.0.read().idmap.keys() { - result.insert(key.to_string()); + result.insert(*key); } result diff --git a/core/src/xml/namespace.rs b/core/src/xml/namespace.rs index 2365df0d9..0b4efa601 100644 --- a/core/src/xml/namespace.rs +++ b/core/src/xml/namespace.rs @@ -1,8 +1,7 @@ //! XML namespacing support -use crate::xml::Error; -use gc_arena::Collect; -use std::borrow::Cow; +use crate::string::{AvmString, BorrowWStr, WStr, WString}; +use gc_arena::{Collect, MutationContext}; use std::fmt; /// Represents a scoped name within XML. @@ -10,102 +9,80 @@ use std::fmt; /// All names in XML are optionally namespaced. Each namespace is represented /// as a string; the document contains a mapping of namespaces to URIs. /// +/// Names without a namespace use the default namespace. +/// /// The special namespace `xmlns` is used to map namespace strings to URIs; it /// should not be used for user-specified namespaces. -#[derive(Clone, Collect, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Copy, Clone, Collect, PartialEq, Eq, PartialOrd, Ord)] #[collect(no_drop)] -pub struct XmlName { - /// The name of the XML namespace this name is scoped to. - /// - /// Names without a namespace use the default namespace. - /// - /// Namespaces may be resolved to a URI by consulting the encapsulating - /// document. - namespace: Option, - name: String, +pub struct XmlName<'gc> { + /// The position of the namespace separator in the name, if the name is namespaced. + namespace_sep: Option, + name: AvmString<'gc>, } -impl XmlName { - /// Construct an XML name from its parts (name and namespace). - pub fn from_parts(namespace: Option<&str>, name: &str) -> Self { - XmlName { - namespace: namespace.map(|s| s.to_string()), - name: name.to_string(), +impl<'gc> XmlName<'gc> { + pub fn in_namespace( + gc_context: MutationContext<'gc, '_>, + namespace: WStr<'_>, + name: WStr<'_>, + ) -> Self { + let mut full_name = WString::from(namespace); + full_name.push_byte(b':'); + full_name.push_str(name); + Self { + namespace_sep: Some(namespace.len()), + name: AvmString::new_ucs2(gc_context, full_name), } } - pub fn from_bytes(bytes: &[u8]) -> Result { - Self::from_bytes_cow(Cow::Borrowed(bytes)) + pub fn in_default_namespace(name: AvmString<'gc>) -> Self { + Self { + namespace_sep: None, + name, + } } - pub fn from_str(strval: &str) -> Self { - Self::from_str_cow(Cow::Borrowed(strval)) - } - - pub fn from_bytes_cow(bytes: Cow<[u8]>) -> Result { - let full_name = match bytes { - Cow::Borrowed(ln) => Cow::Borrowed(std::str::from_utf8(ln)?), - Cow::Owned(ln) => Cow::Owned(String::from_utf8(ln)?), - }; - - Ok(Self::from_str_cow(full_name)) - } - - pub fn from_str_cow(full_name: Cow) -> Self { - if let Some(colon_index) = full_name.find(':') { - Self { - namespace: Some(full_name[0..colon_index].to_owned()), - name: full_name[colon_index + 1..].to_owned(), - } - } else { - Self { - namespace: None, - name: full_name.into_owned(), - } + pub fn from_str(full_name: impl Into>) -> Self { + let full_name = full_name.into(); + Self { + namespace_sep: full_name.find(b':'), + name: full_name, } } /// Retrieve the local part of this name. - pub fn local_name(&self) -> &str { - &self.name + pub fn local_name(&self) -> WStr<'_> { + match self.namespace_sep { + Some(sep) => self.name.slice(sep + 1..), + None => self.name.borrow(), + } } /// Retrieve the prefix part of this name, if available. - pub fn prefix(&self) -> Option<&str> { - self.namespace.as_deref() + pub fn prefix(&self) -> Option> { + self.namespace_sep.map(|sep| self.name.slice(..sep)) } /// Return the fully qualified part of the name. /// /// This consists of the namespace, if present, plus a colon and local name. - pub fn node_name(&self) -> Cow { - if let Some(ref ns) = self.namespace { - Cow::Owned(format!("{}:{}", ns, self.name)) - } else { - Cow::Borrowed(&self.name) - } + pub fn node_name(&self) -> AvmString<'gc> { + self.name } - /// Compares both names as case-insensitve ASCII (for use in HTML parsing). + /// Compares both names as case-insensitve (for use in HTML parsing). /// TODO: We shouldn't need this when we have a proper HTML parser. - pub fn eq_ignore_ascii_case(&self, other: &XmlName) -> bool { - if !self.name.eq_ignore_ascii_case(&other.name) { - return false; - } - - match (&self.namespace, &other.namespace) { - (None, None) => true, - (Some(a), Some(b)) => a.eq_ignore_ascii_case(b), - _ => false, - } + pub fn eq_ignore_case(&self, other: XmlName<'gc>) -> bool { + self.name.eq_ignore_case(other.name.borrow()) } } -impl fmt::Debug for XmlName { +impl<'gc> fmt::Debug for XmlName<'gc> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("XmlName") - .field("namespace", &self.namespace) - .field("name", &self.name) + .field("namespace", &self.prefix()) + .field("name", &self.local_name()) .finish() } } diff --git a/core/src/xml/tests.rs b/core/src/xml/tests.rs index 456218819..4aba6325c 100644 --- a/core/src/xml/tests.rs +++ b/core/src/xml/tests.rs @@ -1,5 +1,6 @@ //! XML tests +use crate::string::WStr; use crate::xml; use crate::xml::{XmlDocument, XmlName}; use gc_arena::rootless_arena; @@ -10,7 +11,7 @@ fn parse_single_element() { rootless_arena(|mc| { let xml = XmlDocument::new(mc); xml.as_node() - .replace_with_str(mc, "", true, false) + .replace_with_str(mc, WStr::from_units(b""), true, false) .expect("Parsed document"); let mut roots = xml.as_node().children(); @@ -33,7 +34,9 @@ fn double_ended_children() { xml.as_node() .replace_with_str( mc, - "", + WStr::from_units( + b"", + ), true, false, ) @@ -69,12 +72,12 @@ fn double_ended_children() { /// Tests round-trip XML writing behavior. #[test] fn round_trip_tostring() { - let test_string = "This is a text node"; + let test_string = b"This is a text node"; rootless_arena(|mc| { let xml = XmlDocument::new(mc); xml.as_node() - .replace_with_str(mc, test_string, true, false) + .replace_with_str(mc, WStr::from_units(test_string), true, false) .expect("Parsed document"); let result = xml @@ -82,19 +85,19 @@ fn round_trip_tostring() { .into_string(&mut |_| true) .expect("Successful toString"); - assert_eq!(test_string, result); + assert_eq!(std::str::from_utf8(test_string).unwrap(), result); }) } /// Tests filtered XML writing behavior. #[test] fn round_trip_filtered_tostring() { - let test_string = "This is a text node"; + let test_string = b"This is a text node"; rootless_arena(|mc| { let xml = XmlDocument::new(mc); xml.as_node() - .replace_with_str(mc, test_string, true, false) + .replace_with_str(mc, WStr::from_units(test_string), true, false) .expect("Parsed document"); let result = xml @@ -114,7 +117,7 @@ fn ignore_white() { xml.as_node() .replace_with_str( mc, - " foo ", + WStr::from_units(b" foo "), true, true, ) @@ -136,7 +139,7 @@ fn ignore_white() { node = node.children().next().expect("Should have text"); assert_eq!(node.node_type(), xml::TEXT_NODE); - assert_eq!(node.node_value(), Some(" foo ".to_string())); + assert_eq!(node.node_value(), Some(" foo ".into())); assert!(root.next().is_none()); }) diff --git a/core/src/xml/tree.rs b/core/src/xml/tree.rs index 7ab1ee7c2..f613920f6 100644 --- a/core/src/xml/tree.rs +++ b/core/src/xml/tree.rs @@ -3,6 +3,7 @@ use crate::avm1::object::xml_attributes_object::XmlAttributesObject; use crate::avm1::object::xml_object::XmlObject; use crate::avm1::{Object, TObject}; +use crate::string::{AvmString, WStr, WString}; use crate::xml; use crate::xml::{Error, XmlDocument, XmlName}; use gc_arena::{Collect, GcCell, MutationContext}; @@ -63,10 +64,10 @@ pub enum XmlNodeData<'gc> { next_sibling: Option>, /// The tag name of this element. - tag_name: XmlName, + tag_name: XmlName<'gc>, /// Attributes of the element. - attributes: BTreeMap, + attributes: BTreeMap, AvmString<'gc>>, /// Child nodes of this element. children: Vec>, @@ -93,7 +94,7 @@ pub enum XmlNodeData<'gc> { next_sibling: Option>, /// The string representation of the text. - contents: String, + contents: AvmString<'gc>, }, /// A comment node in the XML tree. @@ -114,7 +115,7 @@ pub enum XmlNodeData<'gc> { next_sibling: Option>, /// The string representation of the comment. - contents: String, + contents: AvmString<'gc>, }, /// A DOCTYPE node in the XML tree. @@ -135,7 +136,7 @@ pub enum XmlNodeData<'gc> { next_sibling: Option>, /// The string representation of the DOCTYPE. - contents: String, + contents: AvmString<'gc>, }, } @@ -143,7 +144,7 @@ impl<'gc> XmlNode<'gc> { /// Construct a new XML text node. pub fn new_text( mc: MutationContext<'gc, '_>, - contents: &str, + contents: AvmString<'gc>, document: XmlDocument<'gc>, ) -> Self { XmlNode(GcCell::allocate( @@ -155,7 +156,7 @@ impl<'gc> XmlNode<'gc> { parent: None, prev_sibling: None, next_sibling: None, - contents: contents.to_string(), + contents, }, )) } @@ -163,7 +164,7 @@ impl<'gc> XmlNode<'gc> { /// Construct a new XML element node. pub fn new_element( mc: MutationContext<'gc, '_>, - element_name: &str, + element_name: AvmString<'gc>, document: XmlDocument<'gc>, ) -> Self { XmlNode(GcCell::allocate( @@ -225,11 +226,12 @@ impl<'gc> XmlNode<'gc> { pub fn replace_with_str( &mut self, mc: MutationContext<'gc, '_>, - data: &str, + data: WStr<'_>, process_entity: bool, ignore_white: bool, ) -> Result<(), Error> { - let mut parser = Reader::from_str(data); + let data_utf8 = data.to_utf8_lossy(); + let mut parser = Reader::from_str(&data_utf8); let mut buf = Vec::new(); let document = self.document(); let mut open_tags: Vec> = Vec::new(); @@ -258,7 +260,7 @@ impl<'gc> XmlNode<'gc> { } Event::Text(bt) | Event::CData(bt) => { let child = XmlNode::text_from_text_event(mc, bt, document, process_entity)?; - if child.node_value().as_deref() != Some("") + if child.node_value() != Some(AvmString::default()) && (!ignore_white || !child.is_whitespace_text()) { self.add_child_to_tree(mc, &mut open_tags, child)?; @@ -266,13 +268,13 @@ impl<'gc> XmlNode<'gc> { } Event::Comment(bt) => { let child = XmlNode::comment_from_text_event(mc, bt, document)?; - if child.node_value().as_deref() != Some("") { + if child.node_value() != Some(AvmString::default()) { self.add_child_to_tree(mc, &mut open_tags, child)?; } } Event::DocType(bt) => { let child = XmlNode::doctype_from_text_event(mc, bt, document)?; - if child.node_value().as_deref() != Some("") { + if child.node_value() != Some(AvmString::default()) { self.add_child_to_tree(mc, &mut open_tags, child)?; } } @@ -294,17 +296,24 @@ impl<'gc> XmlNode<'gc> { document: XmlDocument<'gc>, process_entity: bool, ) -> Result { - let tag_name = XmlName::from_bytes(bs.name())?; + let tag_name = std::str::from_utf8(bs.name())?; + let tag_name = XmlName::from_str(AvmString::new(mc, tag_name)); let mut attributes = BTreeMap::new(); for a in bs.attributes() { let attribute = a?; - let value = if process_entity { - String::from_utf8(attribute.unescaped_value()?.to_owned().to_vec())? + let value_bytes = if process_entity { + attribute.unescaped_value()? } else { - String::from_utf8(attribute.value.to_owned().to_vec())? + attribute.value }; - attributes.insert(XmlName::from_bytes(attribute.key)?, value); + + let value = match value_bytes { + Cow::Owned(v) => AvmString::new(mc, String::from_utf8(v)?), + Cow::Borrowed(v) => AvmString::new(mc, std::str::from_utf8(v)?), + }; + let attr_key = std::str::from_utf8(attribute.key)?; + attributes.insert(XmlName::from_str(AvmString::new(mc, attr_key)), value); } Ok(XmlNode(GcCell::allocate( @@ -348,7 +357,7 @@ impl<'gc> XmlNode<'gc> { parent: None, prev_sibling: None, next_sibling: None, - contents, + contents: AvmString::new(mc, contents), }, ))) } @@ -370,7 +379,7 @@ impl<'gc> XmlNode<'gc> { parent: None, prev_sibling: None, next_sibling: None, - contents: String::from_utf8(bt.unescaped()?.into_owned())?, + contents: AvmString::new(mc, String::from_utf8(bt.unescaped()?.into_owned())?), }, ))) } @@ -392,7 +401,7 @@ impl<'gc> XmlNode<'gc> { parent: None, prev_sibling: None, next_sibling: None, - contents: String::from_utf8(bt.unescaped()?.into_owned())?, + contents: AvmString::new(mc, String::from_utf8(bt.unescaped()?.into_owned())?), }, ))) } @@ -732,19 +741,19 @@ impl<'gc> XmlNode<'gc> { } /// Returns the tagname, if the element has one. - pub fn tag_name(self) -> Option { + pub fn tag_name(self) -> Option> { match &*self.0.read() { - XmlNodeData::Element { ref tag_name, .. } => Some(tag_name.clone()), + XmlNodeData::Element { ref tag_name, .. } => Some(*tag_name), _ => None, } } /// Returns the string contents of the node, if the element has them. - pub fn node_value(self) -> Option { + pub fn node_value(self) -> Option> { match &*self.0.read() { - XmlNodeData::Text { ref contents, .. } => Some(contents.clone()), - XmlNodeData::Comment { ref contents, .. } => Some(contents.clone()), - XmlNodeData::DocType { ref contents, .. } => Some(contents.clone()), + XmlNodeData::Text { ref contents, .. } => Some(*contents), + XmlNodeData::Comment { ref contents, .. } => Some(*contents), + XmlNodeData::DocType { ref contents, .. } => Some(*contents), _ => None, } } @@ -921,8 +930,8 @@ impl<'gc> XmlNode<'gc> { // Check if this XML node is constitutes text and only contains whitespace. pub fn is_whitespace_text(self) -> bool { - const WHITESPACE_CHARS: &[u8] = &[b' ', b'\t', b'\r', b'\n']; - matches!(&*self.0.read(), XmlNodeData::Text { contents, .. } if contents.bytes().all(|c| WHITESPACE_CHARS.contains(&c))) + const WHITESPACE_CHARS: &[u16] = &[b' ' as u16, b'\t' as u16, b'\r' as u16, b'\n' as u16]; + matches!(&*self.0.read(), XmlNodeData::Text { contents, .. } if contents.iter().all(|c| WHITESPACE_CHARS.contains(&c))) } /// Check if this XML node constitutes text. @@ -961,7 +970,7 @@ impl<'gc> XmlNode<'gc> { parent: None, prev_sibling: None, next_sibling: None, - tag_name: tag_name.clone(), + tag_name: *tag_name, attributes: attributes.clone(), attributes_script_object: None, children: Vec::new(), @@ -973,7 +982,7 @@ impl<'gc> XmlNode<'gc> { parent: None, prev_sibling: None, next_sibling: None, - contents: contents.to_string(), + contents: *contents, }, XmlNodeData::Comment { contents, .. } => XmlNodeData::Comment { script_object: None, @@ -981,7 +990,7 @@ impl<'gc> XmlNode<'gc> { parent: None, prev_sibling: None, next_sibling: None, - contents: contents.to_string(), + contents: *contents, }, XmlNodeData::DocType { contents, .. } => XmlNodeData::DocType { script_object: None, @@ -989,7 +998,7 @@ impl<'gc> XmlNode<'gc> { parent: None, prev_sibling: None, next_sibling: None, - contents: contents.to_string(), + contents: *contents, }, }, )); @@ -1011,20 +1020,19 @@ impl<'gc> XmlNode<'gc> { /// /// If the node does not contain attributes, then this function always /// yields None. - pub fn attribute_value(self, name: &XmlName) -> Option { + pub fn attribute_value(self, name: XmlName<'gc>) -> Option> { match &*self.0.read() { - XmlNodeData::Element { attributes, .. } => attributes.get(name).cloned(), + XmlNodeData::Element { attributes, .. } => attributes.get(&name).copied(), _ => None, } } /// Retrieve all keys defined on this node. - pub fn attribute_keys(self) -> Vec { + pub fn attribute_keys(self) -> Vec> { match &*self.0.read() { - XmlNodeData::Element { attributes, .. } => attributes - .keys() - .map(|v| v.node_name().to_string()) - .collect::>(), + XmlNodeData::Element { attributes, .. } => { + attributes.keys().map(|v| v.node_name()).collect::>() + } _ => Vec::new(), } } @@ -1032,12 +1040,12 @@ impl<'gc> XmlNode<'gc> { /// Retrieve the value of a single attribute on this node, case-insensitively. /// /// TODO: Probably won't need this when we have a proper HTML parser. - pub fn attribute_value_ignore_ascii_case(self, name: &XmlName) -> Option { + pub fn attribute_value_ignore_case(self, name: XmlName<'gc>) -> Option> { match &*self.0.read() { XmlNodeData::Element { attributes, .. } => attributes .iter() - .find(|(k, _)| k.eq_ignore_ascii_case(name)) - .map(|(_, v)| v.clone()), + .find(|(k, _)| k.eq_ignore_case(name)) + .map(|(_, v)| *v), _ => None, } } @@ -1048,20 +1056,20 @@ impl<'gc> XmlNode<'gc> { pub fn set_attribute_value( self, gc_context: MutationContext<'gc, '_>, - name: &XmlName, - value: &str, + name: XmlName<'gc>, + value: AvmString<'gc>, ) { if let XmlNodeData::Element { attributes, .. } = &mut *self.0.write(gc_context) { - attributes.insert(name.clone(), value.to_string()); + attributes.insert(name, value); } } /// Delete the value of a single attribute on this node. /// /// If the node does not contain attributes, then this function silently fails. - pub fn delete_attribute(self, gc_context: MutationContext<'gc, '_>, name: &XmlName) { + pub fn delete_attribute(self, gc_context: MutationContext<'gc, '_>, name: XmlName<'gc>) { if let XmlNodeData::Element { attributes, .. } = &mut *self.0.write(gc_context) { - attributes.remove(name); + attributes.remove(&name); } } @@ -1069,22 +1077,25 @@ impl<'gc> XmlNode<'gc> { /// /// XML namespaces are determined by `xmlns:` namespace attributes on the /// current node, or its parent. - pub fn lookup_uri_for_namespace(self, namespace: &str) -> Option { - let xmlns_default = XmlName::from_parts(None, "xmlns"); - let xmlns_ns = XmlName::from_parts(Some("xmlns"), namespace); - + pub fn lookup_uri_for_namespace( + self, + gc_context: MutationContext<'gc, '_>, + namespace: WStr<'_>, + ) -> Option> { if namespace.is_empty() { - if let Some(url) = self.attribute_value(&xmlns_default) { + let xmlns_default = XmlName::in_default_namespace("xmlns".into()); + if let Some(url) = self.attribute_value(xmlns_default) { return Some(url); } } - if let Some(url) = self.attribute_value(&xmlns_ns) { + let xmlns_ns = XmlName::in_namespace(gc_context, WStr::from_units(b"xmlns"), namespace); + if let Some(url) = self.attribute_value(xmlns_ns) { return Some(url); } if let Some(parent) = self.parent() { - parent.lookup_uri_for_namespace(namespace) + parent.lookup_uri_for_namespace(gc_context, namespace) } else { None } @@ -1099,16 +1110,20 @@ impl<'gc> XmlNode<'gc> { /// `within_namespace`. If it is set to `None`, then any namespace's /// attributes may satisfy the search. It is it set to `""`, then /// the default namespace will be searched. - pub fn value_attribute(self, value: &str, within_namespace: Option<&str>) -> Option { + pub fn value_attribute( + self, + value: WStr<'_>, + within_namespace: Option>, + ) -> Option> { match &*self.0.read() { XmlNodeData::Element { attributes, .. } => { for (attr, attr_value) in attributes.iter() { if let Some(namespace) = within_namespace { - if attr.prefix().unwrap_or("") == namespace && value == attr_value { - return Some(attr.clone()); + if attr.prefix().unwrap_or_default() == namespace && &value == attr_value { + return Some(*attr); } - } else if value == attr_value { - return Some(attr.clone()); + } else if &value == attr_value { + return Some(*attr); } } @@ -1125,9 +1140,9 @@ impl<'gc> XmlNode<'gc> { /// /// If there are multiple namespaces that match the URI, the first /// mentioned on the closest node will be returned. - pub fn lookup_namespace_for_uri(self, uri: &str) -> Option { - if let Some(xname) = self.value_attribute(uri, Some("xmlns")) { - Some(xname.local_name().to_string()) + pub fn lookup_namespace_for_uri(self, uri: WStr<'_>) -> Option { + if let Some(xname) = self.value_attribute(uri, Some(WStr::from_units(b"xmlns"))) { + Some(xname.local_name().into()) } else if let Some(parent) = self.parent() { parent.lookup_namespace_for_uri(uri) } else { @@ -1167,6 +1182,9 @@ impl<'gc> XmlNode<'gc> { W: Write, F: FnMut(&XmlNode<'gc>) -> bool, { + // TODO: we convert all strings to utf8, replacing unpaired surrogates by the replacement char. + // It is correct? + let children: Vec<_> = self.children().filter(|child| filter(child)).collect(); let children_len = children.len(); @@ -1177,24 +1195,24 @@ impl<'gc> XmlNode<'gc> { attributes, .. } => { - let mut bs = if children_len > 0 { - match tag_name.node_name() { - Cow::Borrowed(name) => BytesStart::borrowed_name(name.as_bytes()), - Cow::Owned(name) => BytesStart::owned_name(name), - } - } else { - BytesStart::owned_name(format!("{} ", tag_name.node_name())) + let node_name = tag_name.node_name(); + let mut node_name = node_name.to_utf8_lossy(); + if children_len == 0 { + let mut n = node_name.into_owned(); + n.push(' '); + node_name = n.into(); + } + let mut bs = match node_name { + Cow::Borrowed(name) => BytesStart::borrowed_name(name.as_bytes()), + Cow::Owned(name) => BytesStart::owned_name(name), }; - let key_values: Vec<(Cow, &str)> = attributes - .iter() - .map(|(name, value)| (name.node_name(), value.as_str())) - .collect(); - - bs.extend_attributes( - key_values - .iter() - .map(|(name, value)| Attribute::from((name.as_ref(), *value))), - ); + for (key, value) in attributes { + let name = key.node_name(); + bs.push_attribute(Attribute::from(( + name.to_utf8_lossy().as_ref(), + value.to_utf8_lossy().as_ref(), + ))); + } if children_len > 0 { writer.write_event(&Event::Start(bs)) @@ -1202,14 +1220,14 @@ impl<'gc> XmlNode<'gc> { writer.write_event(&Event::Empty(bs)) } } - XmlNodeData::Text { contents, .. } => { - writer.write_event(&Event::Text(BytesText::from_plain_str(contents.as_str()))) - } + XmlNodeData::Text { contents, .. } => writer.write_event(&Event::Text( + BytesText::from_plain_str(&contents.to_utf8_lossy()), + )), XmlNodeData::Comment { contents, .. } => writer.write_event(&Event::Comment( - BytesText::from_plain_str(contents.as_str()), + BytesText::from_plain_str(&contents.to_utf8_lossy()), )), XmlNodeData::DocType { contents, .. } => writer.write_event(&Event::DocType( - BytesText::from_plain_str(contents.as_str()), + BytesText::from_plain_str(&contents.to_utf8_lossy()), )), }?; @@ -1221,7 +1239,9 @@ impl<'gc> XmlNode<'gc> { XmlNodeData::DocumentRoot { .. } => Ok(()), XmlNodeData::Element { tag_name, .. } => { if children_len > 0 { - let bs = match tag_name.node_name() { + let node_name = tag_name.node_name(); + + let bs = match node_name.to_utf8_lossy() { Cow::Borrowed(name) => BytesEnd::borrowed(name.as_bytes()), Cow::Owned(name) => BytesEnd::owned(name.into()), };