Skip to content

Commit 8f73318

Browse files
committed
Fix issues with escaping html tags
Now additional rules are the following: - Do not escape any tags supported by Android (see https://developer.android.com/guide/topics/resources/string-resource.html#StylingWithHTML) - Do not escape double quotes within supported tags
1 parent 851a282 commit 8f73318

File tree

2 files changed

+138
-12
lines changed

2 files changed

+138
-12
lines changed

src/main.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,22 @@ fn main() -> Result<()> {
1919
run_android_gen_pipeline(&args.input_dir, &args.output_dir, &args.default_lang)
2020
}
2121

22-
fn run_android_gen_pipeline(input_dir: &String, output_dir: &String, default_lang: &Option<String>) -> Result<()> {
22+
fn run_android_gen_pipeline(
23+
input_dir: &String,
24+
output_dir: &String,
25+
default_lang: &Option<String>,
26+
) -> Result<()> {
2327
for src in fs::read_dir(input_dir)? {
2428
let src = src?;
2529
if src.file_type()?.is_file() {
2630
let parsed = parser::parse(src.path()).map_err(|err| anyhow!(err))?;
2731
let generated = generator::generate(&parsed)?;
2832
generated.write(
29-
output_dir,
30-
src.path().file_stem().and_then(|os_str| os_str.to_str()).ok_or(anyhow!("Cannot extract file name"))?,
33+
output_dir,
34+
src.path()
35+
.file_stem()
36+
.and_then(|os_str| os_str.to_str())
37+
.ok_or(anyhow!("Cannot extract file name"))?,
3138
default_lang,
3239
)?;
3340
}

src/parse.rs

Lines changed: 128 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,34 @@ use std::io::{BufRead, BufWriter};
1111
use std::{borrow::Cow, fmt::format, path::Path};
1212
use tempfile::NamedTempFile;
1313

14+
// Taken from
15+
// https://developer.android.com/guide/topics/resources/string-resource.html#StylingWithHTML
16+
const ANDROID_SUPPORTED_TAGS: &'static [&'static str] = &[
17+
"annotation",
18+
"a",
19+
"i",
20+
"cite",
21+
"dfn",
22+
"b",
23+
"em",
24+
"big",
25+
"small",
26+
"font",
27+
"tt",
28+
"s",
29+
"strike",
30+
"del",
31+
"u",
32+
"sup",
33+
"sub",
34+
"ul",
35+
"li",
36+
"br",
37+
"div",
38+
"span",
39+
"p",
40+
];
41+
1442
#[derive(Debug)]
1543
pub struct File {
1644
pub sections: Vec<Section>,
@@ -301,18 +329,75 @@ fn maybe_escape_characters(input: &str) -> Cow<str> {
301329
let needs_escaping =
302330
input.contains("&") || input.contains("<") || input.contains("'") || input.contains("\"");
303331
if needs_escaping {
304-
Cow::Owned(
305-
input
306-
.replace("&", "&amp;")
307-
.replace("<", "&lt;")
308-
.replace("'", "\\'")
309-
.replace("\"", "\\\""),
310-
)
332+
if ANDROID_SUPPORTED_TAGS
333+
.iter()
334+
.any(|tag| input.contains(&format!("<{tag}")))
335+
{
336+
escape_input_with_html_tags(input)
337+
} else {
338+
// fast path
339+
Cow::Owned(escape_with_no_html_tags(input))
340+
}
311341
} else {
312342
Cow::Borrowed(input)
313343
}
314344
}
315345

346+
fn escape_with_no_html_tags(input: &str) -> String {
347+
return input
348+
.replace("&", "&amp;")
349+
.replace("<", "&lt;")
350+
.replace("'", "\\'")
351+
.replace("\"", "\\\"");
352+
}
353+
354+
fn escape_input_with_html_tags(input: &str) -> Cow<str> {
355+
// contains [start,end) indexes of tag regions
356+
let mut tag_regions: Vec<(usize, usize)> = Vec::new();
357+
for tag in ANDROID_SUPPORTED_TAGS {
358+
let mut start = 0;
359+
while start < input.len() {
360+
let Some(s) = input[start..].find(&format!("<{tag}")) else {
361+
break;
362+
};
363+
let abs_start = start + s;
364+
let Some(e) = input[abs_start + tag.len() + 1..].find(&format!("{tag}>")) else {
365+
break;
366+
};
367+
// "annotation" + ">"
368+
let abs_end = (abs_start + tag.len() + 1) + e + (tag.len() + 1);
369+
tag_regions.push((abs_start, abs_end));
370+
start = abs_end;
371+
}
372+
}
373+
if tag_regions.is_empty() {
374+
return Cow::Borrowed(input);
375+
}
376+
let mut result = String::new();
377+
if tag_regions.len() == 1 {
378+
let region = tag_regions[0];
379+
result.push_str(&escape_with_no_html_tags(&input[0..region.0]));
380+
result.push_str(&input[region.0..region.1]);
381+
result.push_str(&escape_with_no_html_tags(&input[region.1..]))
382+
} else {
383+
tag_regions.sort_by_key(|r| r.0);
384+
// fully escape parts:
385+
// - before the first tag
386+
// - between tags
387+
// - after last tag
388+
result.push_str(&escape_with_no_html_tags(&input[0..tag_regions[0].0]));
389+
result.push_str(&input[tag_regions[0].0..tag_regions[0].1]);
390+
for trs in tag_regions.windows(2) {
391+
result.push_str(&escape_with_no_html_tags(&input[trs[0].1..trs[1].0]));
392+
result.push_str(&input[trs[1].0..trs[1].1]);
393+
}
394+
result.push_str(&escape_with_no_html_tags(
395+
&input[tag_regions[tag_regions.len() - 1].1..],
396+
));
397+
}
398+
return Cow::Owned(result);
399+
}
400+
316401
#[test]
317402
fn parses_simple_string() {
318403
let input = "Lorem ipsum".to_string();
@@ -350,9 +435,43 @@ fn parses_multiple_placeholders_keeping_order_if_present() {
350435

351436
#[test]
352437
fn parses_html_tags_and_related_characters_with_proper_escaping() {
353-
let input = "У нас было <b>38</b> попугаев в <i>чистой</i> упаковке, на которой было указано: 38 < 89 && 88 >= 55".to_string();
438+
for tag in ANDROID_SUPPORTED_TAGS {
439+
let input = format!(
440+
"У нас было <{tag}>38</{tag}> попугаев в <{tag} link=\"hello\">чистой</{tag}> \"упаковке\", на <unsupported>которой</unsupported> было указано: 38 < 89 && 88 >= 55",
441+
);
442+
let result = parse_localized_string_value(input).unwrap();
443+
assert_eq!(
444+
result,
445+
format!("У нас было <{tag}>38</{tag}> попугаев в <{tag} link=\"hello\">чистой</{tag}> \\\"упаковке\\\", на &lt;unsupported>которой&lt;/unsupported> было указано: 38 &lt; 89 &amp;&amp; 88 >= 55")
446+
)
447+
}
448+
}
449+
450+
#[test]
451+
fn parses_html_tags_and_related_characters_with_proper_escaping_different_tags() {
452+
let input = "У нас было <b>38</b> попугаев в <i>чистой</i> упаковке".to_string();
354453
let result = parse_localized_string_value(input).unwrap();
355-
assert_eq!(result, "У нас было &lt;b>38&lt;/b> попугаев в &lt;i>чистой&lt;/i> упаковке, на которой было указано: 38 &lt; 89 &amp;&amp; 88 >= 55");
454+
assert_eq!(
455+
result,
456+
"У нас было <b>38</b> попугаев в <i>чистой</i> упаковке",
457+
)
458+
}
459+
460+
#[test]
461+
fn parses_html_tags_and_related_characters_with_proper_escaping_only_tag() {
462+
let input = "<b>вот ведь</b>".to_string();
463+
let result = parse_localized_string_value(input).unwrap();
464+
assert_eq!(result, "<b>вот ведь</b>",)
465+
}
466+
467+
#[test]
468+
fn parses_html_tags_and_related_characters_with_proper_escaping_one_tag() {
469+
let input = "Неожиданный амперсанд &, меньше < и кавычки \" и одинарные ' <a href=\"hello\">вот ведь</a>".to_string();
470+
let result = parse_localized_string_value(input).unwrap();
471+
assert_eq!(
472+
result,
473+
"Неожиданный амперсанд &amp;, меньше &lt; и кавычки \\\" и одинарные \\' <a href=\"hello\">вот ведь</a>",
474+
)
356475
}
357476

358477
#[test]

0 commit comments

Comments
 (0)