text: HTML tags and attributes are case insensitive (fix #1021)
Use eq_ignore_ascii_case when parsing HTML tags. Different versions of Flash may export HTML tags with different cases, so this will work a little better; however, we'll need a true HTML parser to handle this robustly (for opening and closing tags with different cases, for example).
This commit is contained in:
parent
febada8a8e
commit
f55bac2014
|
@ -27,7 +27,8 @@ fn process_html_entity(src: &str) -> Cow<str> {
|
|||
for (i, ch) in src.char_indices() {
|
||||
if let Some(start) = entity_start {
|
||||
if ch == ';' {
|
||||
match &src[start + 1..i] {
|
||||
let s = src[start + 1..i].to_ascii_lowercase();
|
||||
match s.as_str() {
|
||||
"amp" => result_str.push('&'),
|
||||
"lt" => result_str.push('<'),
|
||||
"gt" => result_str.push('>'),
|
||||
|
@ -36,7 +37,7 @@ fn process_html_entity(src: &str) -> Cow<str> {
|
|||
"nbsp" => result_str.push('\u{00A0}'),
|
||||
s if s.len() >= 2 && s.as_bytes()[0] == b'#' => {
|
||||
// Number entity: &#nnnn; or &#xhhhh;
|
||||
let (digits, radix) = if s.as_bytes()[1] == b'x' {
|
||||
let (digits, radix) = if src.as_bytes()[1] == b'x' {
|
||||
// Only trailing 4 hex digits are used.
|
||||
let start = usize::max(s.len(), 6) - 4;
|
||||
(&s[start..], 16)
|
||||
|
@ -270,33 +271,46 @@ impl TextFormat {
|
|||
/// in this format.
|
||||
pub fn from_presentational_markup(node: XMLNode<'_>, mut tf: TextFormat) -> Self {
|
||||
match node.tag_name() {
|
||||
Some(name) if name == XMLName::from_str("p") => {
|
||||
match node.attribute_value(&XMLName::from_str("align")).as_deref() {
|
||||
Some(name) if name.eq_ignore_ascii_case(&XMLName::from_str("p")) => {
|
||||
match node
|
||||
.attribute_value_ignore_ascii_case(&XMLName::from_str("align"))
|
||||
.as_deref()
|
||||
{
|
||||
Some("left") => tf.align = Some(swf::TextAlign::Left),
|
||||
Some("center") => tf.align = Some(swf::TextAlign::Center),
|
||||
Some("right") => tf.align = Some(swf::TextAlign::Right),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Some(name) if name == XMLName::from_str("a") => {
|
||||
if let Some(href) = node.attribute_value(&XMLName::from_str("href")) {
|
||||
Some(name) if name.eq_ignore_ascii_case(&XMLName::from_str("a")) => {
|
||||
if let Some(href) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("href"))
|
||||
{
|
||||
tf.url = Some(href);
|
||||
}
|
||||
|
||||
if let Some(target) = node.attribute_value(&XMLName::from_str("target")) {
|
||||
if let Some(target) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("target"))
|
||||
{
|
||||
tf.target = Some(target);
|
||||
}
|
||||
}
|
||||
Some(name) if name == XMLName::from_str("font") => {
|
||||
if let Some(face) = node.attribute_value(&XMLName::from_str("face")) {
|
||||
Some(name) if name.eq_ignore_ascii_case(&XMLName::from_str("font")) => {
|
||||
if let Some(face) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("face"))
|
||||
{
|
||||
tf.font = Some(face);
|
||||
}
|
||||
|
||||
if let Some(size) = node.attribute_value(&XMLName::from_str("size")) {
|
||||
if let Some(size) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("size"))
|
||||
{
|
||||
tf.size = size.parse().ok();
|
||||
}
|
||||
|
||||
if let Some(color) = node.attribute_value(&XMLName::from_str("color")) {
|
||||
if let Some(color) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("color"))
|
||||
{
|
||||
if color.starts_with('#') {
|
||||
let rval = color.get(1..3).and_then(|v| u8::from_str_radix(v, 16).ok());
|
||||
let gval = color.get(3..5).and_then(|v| u8::from_str_radix(v, 16).ok());
|
||||
|
@ -309,13 +323,13 @@ impl TextFormat {
|
|||
}
|
||||
|
||||
if let Some(letter_spacing) =
|
||||
node.attribute_value(&XMLName::from_str("letterSpacing"))
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("letterSpacing"))
|
||||
{
|
||||
tf.letter_spacing = letter_spacing.parse().ok();
|
||||
}
|
||||
|
||||
tf.kerning = match node
|
||||
.attribute_value(&XMLName::from_str("kerning"))
|
||||
.attribute_value_ignore_ascii_case(&XMLName::from_str("kerning"))
|
||||
.as_deref()
|
||||
{
|
||||
Some("1") => Some(true),
|
||||
|
@ -323,43 +337,54 @@ impl TextFormat {
|
|||
_ => tf.kerning,
|
||||
}
|
||||
}
|
||||
Some(name) if name == XMLName::from_str("b") => {
|
||||
Some(name) if name.eq_ignore_ascii_case(&XMLName::from_str("b")) => {
|
||||
tf.bold = Some(true);
|
||||
}
|
||||
Some(name) if name == XMLName::from_str("i") => {
|
||||
Some(name) if name.eq_ignore_ascii_case(&XMLName::from_str("i")) => {
|
||||
tf.italic = Some(true);
|
||||
}
|
||||
Some(name) if name == XMLName::from_str("u") => {
|
||||
Some(name) if name.eq_ignore_ascii_case(&XMLName::from_str("u")) => {
|
||||
tf.underline = Some(true);
|
||||
}
|
||||
Some(name) if name == XMLName::from_str("li") => {
|
||||
Some(name) if name.eq_ignore_ascii_case(&XMLName::from_str("li")) => {
|
||||
tf.bullet = Some(true);
|
||||
}
|
||||
Some(name) if name == XMLName::from_str("textformat") => {
|
||||
Some(name) if name.eq_ignore_ascii_case(&XMLName::from_str("textformat")) => {
|
||||
//TODO: Spec says these are all in twips. That doesn't seem to
|
||||
//match Flash 8.
|
||||
if let Some(left_margin) = node.attribute_value(&XMLName::from_str("leftmargin")) {
|
||||
if let Some(left_margin) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("leftmargin"))
|
||||
{
|
||||
tf.left_margin = left_margin.parse().ok();
|
||||
}
|
||||
|
||||
if let Some(right_margin) = node.attribute_value(&XMLName::from_str("rightmargin"))
|
||||
if let Some(right_margin) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("rightmargin"))
|
||||
{
|
||||
tf.right_margin = right_margin.parse().ok();
|
||||
}
|
||||
|
||||
if let Some(indent) = node.attribute_value(&XMLName::from_str("indent")) {
|
||||
if let Some(indent) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("indent"))
|
||||
{
|
||||
tf.indent = indent.parse().ok();
|
||||
}
|
||||
|
||||
if let Some(blockindent) = node.attribute_value(&XMLName::from_str("blockindent")) {
|
||||
if let Some(blockindent) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("blockindent"))
|
||||
{
|
||||
tf.block_indent = blockindent.parse().ok();
|
||||
}
|
||||
|
||||
if let Some(leading) = node.attribute_value(&XMLName::from_str("leading")) {
|
||||
if let Some(leading) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("leading"))
|
||||
{
|
||||
tf.leading = leading.parse().ok();
|
||||
}
|
||||
|
||||
if let Some(tabstops) = node.attribute_value(&XMLName::from_str("tabstops")) {
|
||||
if let Some(tabstops) =
|
||||
node.attribute_value_ignore_ascii_case(&XMLName::from_str("tabstops"))
|
||||
{
|
||||
tf.tab_stops = Some(
|
||||
tabstops
|
||||
.split(',')
|
||||
|
@ -1215,14 +1240,30 @@ impl FormatSpans {
|
|||
for step in tree.as_node().walk().unwrap() {
|
||||
match step {
|
||||
Step::In(node)
|
||||
if node.tag_name().unwrap().node_name() == "sbr"
|
||||
|| node.tag_name().unwrap().node_name() == "br" =>
|
||||
if node
|
||||
.tag_name()
|
||||
.unwrap()
|
||||
.node_name()
|
||||
.eq_ignore_ascii_case("sbr")
|
||||
|| node
|
||||
.tag_name()
|
||||
.unwrap()
|
||||
.node_name()
|
||||
.eq_ignore_ascii_case("br") =>
|
||||
{
|
||||
self.replace_text(self.text.len(), self.text.len(), "\n", format_stack.last());
|
||||
}
|
||||
Step::Out(node)
|
||||
if node.tag_name().unwrap().node_name() == "sbr"
|
||||
|| node.tag_name().unwrap().node_name() == "br" => {}
|
||||
if node
|
||||
.tag_name()
|
||||
.unwrap()
|
||||
.node_name()
|
||||
.eq_ignore_ascii_case("sbr")
|
||||
|| node
|
||||
.tag_name()
|
||||
.unwrap()
|
||||
.node_name()
|
||||
.eq_ignore_ascii_case("br") => {}
|
||||
Step::In(node) => format_stack.push(TextFormat::from_presentational_markup(
|
||||
node,
|
||||
format_stack
|
||||
|
@ -1240,8 +1281,16 @@ impl FormatSpans {
|
|||
last_successful_format = format_stack.last().cloned();
|
||||
}
|
||||
Step::Out(node)
|
||||
if node.tag_name().unwrap().node_name() == "p"
|
||||
|| node.tag_name().unwrap().node_name() == "li" =>
|
||||
if node
|
||||
.tag_name()
|
||||
.unwrap()
|
||||
.node_name()
|
||||
.eq_ignore_ascii_case("p")
|
||||
|| node
|
||||
.tag_name()
|
||||
.unwrap()
|
||||
.node_name()
|
||||
.eq_ignore_ascii_case("li") =>
|
||||
{
|
||||
self.replace_text(
|
||||
self.text.len(),
|
||||
|
|
|
@ -85,6 +85,20 @@ impl XMLName {
|
|||
Cow::Borrowed(&self.name)
|
||||
}
|
||||
}
|
||||
|
||||
/// Compares both names as case-insensitve ASCII (for use in HTML parsing).
|
||||
/// TODO: We shouldn't need this when we have a proper HTML parser.
|
||||
pub fn eq_ignore_ascii_case(&self, other: &XMLName) -> bool {
|
||||
if !self.name.eq_ignore_ascii_case(&other.name) {
|
||||
return false;
|
||||
}
|
||||
|
||||
match (&self.namespace, &other.namespace) {
|
||||
(None, None) => true,
|
||||
(Some(a), Some(b)) => a.eq_ignore_ascii_case(&b),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for XMLName {
|
||||
|
|
|
@ -1042,6 +1042,19 @@ impl<'gc> XMLNode<'gc> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Retrieve the value of a single attribute on this node, case-insensitively.
|
||||
///
|
||||
/// TODO: Probably won't need this when we have a proper HTML parser.
|
||||
pub fn attribute_value_ignore_ascii_case(self, name: &XMLName) -> Option<String> {
|
||||
match &*self.0.read() {
|
||||
XMLNodeData::Element { attributes, .. } => attributes
|
||||
.iter()
|
||||
.find(|(k, _)| k.eq_ignore_ascii_case(name))
|
||||
.map(|(_, v)| v.clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the value of a single attribute on this node.
|
||||
///
|
||||
/// If the node does not contain attributes, then this function silently fails.
|
||||
|
|
Loading…
Reference in New Issue