@@ -11,6 +11,34 @@ use std::io::{BufRead, BufWriter};
1111use std:: { borrow:: Cow , fmt:: format, path:: Path } ;
1212use tempfile:: NamedTempFile ;
1313
14+ // Taken from
15+ // https://developer.android.com/guide/topics/resources/string-resource.html#StylingWithHTML
16+ const ANDROID_SUPPORTED_TAGS : & ' static [ & ' static str ] = & [
17+ "annotation" ,
18+ "a" ,
19+ "i" ,
20+ "cite" ,
21+ "dfn" ,
22+ "b" ,
23+ "em" ,
24+ "big" ,
25+ "small" ,
26+ "font" ,
27+ "tt" ,
28+ "s" ,
29+ "strike" ,
30+ "del" ,
31+ "u" ,
32+ "sup" ,
33+ "sub" ,
34+ "ul" ,
35+ "li" ,
36+ "br" ,
37+ "div" ,
38+ "span" ,
39+ "p" ,
40+ ] ;
41+
1442#[ derive( Debug ) ]
1543pub struct File {
1644 pub sections : Vec < Section > ,
@@ -301,18 +329,75 @@ fn maybe_escape_characters(input: &str) -> Cow<str> {
301329 let needs_escaping =
302330 input. contains ( "&" ) || input. contains ( "<" ) || input. contains ( "'" ) || input. contains ( "\" " ) ;
303331 if needs_escaping {
304- Cow :: Owned (
305- input
306- . replace ( "&" , "&" )
307- . replace ( "<" , "<" )
308- . replace ( "'" , "\\ '" )
309- . replace ( "\" " , "\\ \" " ) ,
310- )
332+ if ANDROID_SUPPORTED_TAGS
333+ . iter ( )
334+ . any ( |tag| input. contains ( & format ! ( "<{tag}" ) ) )
335+ {
336+ escape_input_with_html_tags ( input)
337+ } else {
338+ // fast path
339+ Cow :: Owned ( escape_with_no_html_tags ( input) )
340+ }
311341 } else {
312342 Cow :: Borrowed ( input)
313343 }
314344}
315345
346+ fn escape_with_no_html_tags ( input : & str ) -> String {
347+ return input
348+ . replace ( "&" , "&" )
349+ . replace ( "<" , "<" )
350+ . replace ( "'" , "\\ '" )
351+ . replace ( "\" " , "\\ \" " ) ;
352+ }
353+
354+ fn escape_input_with_html_tags ( input : & str ) -> Cow < str > {
355+ // contains [start,end) indexes of tag regions
356+ let mut tag_regions: Vec < ( usize , usize ) > = Vec :: new ( ) ;
357+ for tag in ANDROID_SUPPORTED_TAGS {
358+ let mut start = 0 ;
359+ while start < input. len ( ) {
360+ let Some ( s) = input[ start..] . find ( & format ! ( "<{tag}" ) ) else {
361+ break ;
362+ } ;
363+ let abs_start = start + s;
364+ let Some ( e) = input[ abs_start + tag. len ( ) + 1 ..] . find ( & format ! ( "{tag}>" ) ) else {
365+ break ;
366+ } ;
367+ // "annotation" + ">"
368+ let abs_end = ( abs_start + tag. len ( ) + 1 ) + e + ( tag. len ( ) + 1 ) ;
369+ tag_regions. push ( ( abs_start, abs_end) ) ;
370+ start = abs_end;
371+ }
372+ }
373+ if tag_regions. is_empty ( ) {
374+ return Cow :: Borrowed ( input) ;
375+ }
376+ let mut result = String :: new ( ) ;
377+ if tag_regions. len ( ) == 1 {
378+ let region = tag_regions[ 0 ] ;
379+ result. push_str ( & escape_with_no_html_tags ( & input[ 0 ..region. 0 ] ) ) ;
380+ result. push_str ( & input[ region. 0 ..region. 1 ] ) ;
381+ result. push_str ( & escape_with_no_html_tags ( & input[ region. 1 ..] ) )
382+ } else {
383+ tag_regions. sort_by_key ( |r| r. 0 ) ;
384+ // fully escape parts:
385+ // - before the first tag
386+ // - between tags
387+ // - after last tag
388+ result. push_str ( & escape_with_no_html_tags ( & input[ 0 ..tag_regions[ 0 ] . 0 ] ) ) ;
389+ result. push_str ( & input[ tag_regions[ 0 ] . 0 ..tag_regions[ 0 ] . 1 ] ) ;
390+ for trs in tag_regions. windows ( 2 ) {
391+ result. push_str ( & escape_with_no_html_tags ( & input[ trs[ 0 ] . 1 ..trs[ 1 ] . 0 ] ) ) ;
392+ result. push_str ( & input[ trs[ 1 ] . 0 ..trs[ 1 ] . 1 ] ) ;
393+ }
394+ result. push_str ( & escape_with_no_html_tags (
395+ & input[ tag_regions[ tag_regions. len ( ) - 1 ] . 1 ..] ,
396+ ) ) ;
397+ }
398+ return Cow :: Owned ( result) ;
399+ }
400+
316401#[ test]
317402fn parses_simple_string ( ) {
318403 let input = "Lorem ipsum" . to_string ( ) ;
@@ -350,9 +435,43 @@ fn parses_multiple_placeholders_keeping_order_if_present() {
350435
351436#[ test]
352437fn parses_html_tags_and_related_characters_with_proper_escaping ( ) {
353- let input = "У нас было <b>38</b> попугаев в <i>чистой</i> упаковке, на которой было указано: 38 < 89 && 88 >= 55" . to_string ( ) ;
438+ for tag in ANDROID_SUPPORTED_TAGS {
439+ let input = format ! (
440+ "У нас было <{tag}>38</{tag}> попугаев в <{tag} link=\" hello\" >чистой</{tag}> \" упаковке\" , на <unsupported>которой</unsupported> было указано: 38 < 89 && 88 >= 55" ,
441+ ) ;
442+ let result = parse_localized_string_value ( input) . unwrap ( ) ;
443+ assert_eq ! (
444+ result,
445+ format!( "У нас было <{tag}>38</{tag}> попугаев в <{tag} link=\" hello\" >чистой</{tag}> \\ \" упаковке\\ \" , на <unsupported>которой</unsupported> было указано: 38 < 89 && 88 >= 55" )
446+ )
447+ }
448+ }
449+
450+ #[ test]
451+ fn parses_html_tags_and_related_characters_with_proper_escaping_different_tags ( ) {
452+ let input = "У нас было <b>38</b> попугаев в <i>чистой</i> упаковке" . to_string ( ) ;
354453 let result = parse_localized_string_value ( input) . unwrap ( ) ;
355- assert_eq ! ( result, "У нас было <b>38</b> попугаев в <i>чистой</i> упаковке, на которой было указано: 38 < 89 && 88 >= 55" ) ;
454+ assert_eq ! (
455+ result,
456+ "У нас было <b>38</b> попугаев в <i>чистой</i> упаковке" ,
457+ )
458+ }
459+
460+ #[ test]
461+ fn parses_html_tags_and_related_characters_with_proper_escaping_only_tag ( ) {
462+ let input = "<b>вот ведь</b>" . to_string ( ) ;
463+ let result = parse_localized_string_value ( input) . unwrap ( ) ;
464+ assert_eq ! ( result, "<b>вот ведь</b>" , )
465+ }
466+
467+ #[ test]
468+ fn parses_html_tags_and_related_characters_with_proper_escaping_one_tag ( ) {
469+ let input = "Неожиданный амперсанд &, меньше < и кавычки \" и одинарные ' <a href=\" hello\" >вот ведь</a>" . to_string ( ) ;
470+ let result = parse_localized_string_value ( input) . unwrap ( ) ;
471+ assert_eq ! (
472+ result,
473+ "Неожиданный амперсанд &, меньше < и кавычки \\ \" и одинарные \\ ' <a href=\" hello\" >вот ведь</a>" ,
474+ )
356475}
357476
358477#[ test]
0 commit comments