Allow > inside HTML attributes (#2918)

* Allow > inside HTML attributes

* Don't add unnecessary (?:...) to HTML_MEDIA_TAGS
This commit is contained in:
Viktor Ricci 2024-01-01 05:09:30 +01:00 committed by GitHub
parent ddabbddeb1
commit f544bdd041
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 1 deletions

View file

@ -154,6 +154,7 @@ Lucio Sauer <watermanpaint@posteo.net>
Gustavo Sales <gustavosmendes14@gmail.com>
Shawn M Moore <https://github.com/sartak>
Marko Sisovic <msisovic13@gmail.com>
Viktor Ricci <ricci@primateer.de>
********************

View file

@ -794,4 +794,28 @@ Unused: unused.jpg
);
Ok(())
}
#[test]
fn html_chevron_in_non_source_attribute() -> Result<()> {
let (_dir, _mgr, mut col) = common_setup()?;
let mut checker = col.media_checker()?;
let field = "<img alt=\"alt>\" src=\"foo.jpg\">";
let seen = normalize_and_maybe_rename_files_helper(&mut checker, field);
assert!(seen.contains("foo.jpg"));
let field = "<img alt='>a>l>t>' src='bar.jpg'>";
let seen = normalize_and_maybe_rename_files_helper(&mut checker, field);
assert!(seen.contains("bar.jpg"));
let field = "<img alt='\"alt>\"' src='double-in-single.jpg'>";
let seen = normalize_and_maybe_rename_files_helper(&mut checker, field);
assert!(seen.contains("double-in-single.jpg"));
let field = "<img alt='alt'> src='illegal.jpg'>";
let seen = normalize_and_maybe_rename_files_helper(&mut checker, field);
assert!(!seen.contains("illegal.jpg"));
Ok(())
}
}

View file

@ -104,7 +104,19 @@ lazy_static! {
pub static ref HTML_MEDIA_TAGS: Regex = Regex::new(
r#"(?xsi)
# the start of the image, audio, or object tag
<\b(?:img|audio|video|object)\b[^>]+\b(?:src|data)\b=
<\b(?:img|audio|video|object)\b
# any non-`>`, except inside `"` or `'`
(?:
[^>]
|
"[^"]+?"
|
'[^']+?'
)+
# capture `src` or `data` attribute
\b(?:src|data)\b=
(?:
# 1: double-quoted filename
"