diff --git a/CONTRIBUTORS b/CONTRIBUTORS index d72b1c01e..7c99c809b 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -154,6 +154,7 @@ Lucio Sauer Gustavo Sales Shawn M Moore Marko Sisovic +Viktor Ricci ******************** diff --git a/rslib/src/media/check.rs b/rslib/src/media/check.rs index 13130f344..cfc42e73c 100644 --- a/rslib/src/media/check.rs +++ b/rslib/src/media/check.rs @@ -794,4 +794,28 @@ Unused: unused.jpg ); Ok(()) } + + #[test] + fn html_chevron_in_non_source_attribute() -> Result<()> { + let (_dir, _mgr, mut col) = common_setup()?; + let mut checker = col.media_checker()?; + + let field = "\"alt\" src=\"foo.jpg\">"; + let seen = normalize_and_maybe_rename_files_helper(&mut checker, field); + assert!(seen.contains("foo.jpg")); + + let field = ">a>l>t>"; + let seen = normalize_and_maybe_rename_files_helper(&mut checker, field); + assert!(seen.contains("bar.jpg")); + + let field = "\"alt>\""; + let seen = normalize_and_maybe_rename_files_helper(&mut checker, field); + assert!(seen.contains("double-in-single.jpg")); + + let field = "alt src='illegal.jpg'>"; + let seen = normalize_and_maybe_rename_files_helper(&mut checker, field); + assert!(!seen.contains("illegal.jpg")); + + Ok(()) + } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index bc0306ab3..3d681a1ac 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -104,7 +104,19 @@ lazy_static! { pub static ref HTML_MEDIA_TAGS: Regex = Regex::new( r#"(?xsi) # the start of the image, audio, or object tag - <\b(?:img|audio|video|object)\b[^>]+\b(?:src|data)\b= + <\b(?:img|audio|video|object)\b + + # any non-`>`, except inside `"` or `'` + (?: + [^>] + | + "[^"]+?" + | + '[^']+?' + )+ + + # capture `src` or `data` attribute + \b(?:src|data)\b= (?: # 1: double-quoted filename "