Recognise and check for media referenced in <source> tags (#3763)

* recognise and check <source> tags for media

* add test
This commit is contained in:
llama 2025-01-25 15:16:24 +08:00 committed by GitHub
parent 8ec94e281c
commit 873c4e617d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 27 additions and 4 deletions

View file

@ -33,9 +33,9 @@ class MediaManager(DeprecatedNamesMixin):
sound_regexps = [r"(?i)(\[sound:(?P<fname>[^]]+)\])"]
html_media_regexps = [
# src element quoted case
r"(?i)(<(?:img|audio)\b[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
r"(?i)(<(?:img|audio|source)\b[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
# unquoted case
r"(?i)(<(?:img|audio)\b[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
r"(?i)(<(?:img|audio|source)\b[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
# src element quoted case
r"(?i)(<object\b[^>]* data=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
# unquoted case

View file

@ -846,4 +846,27 @@ Unused: unused.jpg
Ok(())
}
#[test]
fn source_tags() -> Result<()> {
let (_dir, _mgr, mut col) = common_setup()?;
let mut checker = col.media_checker()?;
let field = "<audio controls><source src='foo-ss.mp3' /><source type='audio/ogg' src='bar-ss.ogg' /></audio>";
let seen = normalize_and_maybe_rename_files_helper(&mut checker, field);
assert!(seen.contains("foo-ss.mp3"));
assert!(seen.contains("bar-ss.ogg"));
let field = r#"
<picture>
<source src="foo-dd.webp" media="(orientation: portrait)" />
<img src="bar-dd.gif" alt="fancy jif" />
</picture>
"#;
let seen = normalize_and_maybe_rename_files_helper(&mut checker, field);
assert!(seen.contains("foo-dd.webp"));
assert!(seen.contains("bar-dd.gif"));
Ok(())
}
}

View file

@ -107,8 +107,8 @@ static HTML_LINEBREAK_TAGS: LazyLock<Regex> = LazyLock::new(|| {
pub static HTML_MEDIA_TAGS: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r#"(?xsi)
# the start of the image, audio, or object tag
<\b(?:img|audio|video|object)\b
# the start of the image, audio, object, or source tag
<\b(?:img|audio|video|object|source)\b
# any non-`>`, except inside `"` or `'`
(?: