diff --git a/pylib/anki/media.py b/pylib/anki/media.py index 060dc847d..28abac172 100644 --- a/pylib/anki/media.py +++ b/pylib/anki/media.py @@ -34,13 +34,17 @@ def media_paths_from_col_path(col_path: str) -> Tuple[str, str]: class MediaManager: soundRegexps = [r"(?i)(\[sound:(?P[^]]+)\])"] - imgRegexps = [ + htmlRegexps = [ # src element quoted case - r"(?i)(]* src=(?P[\"'])(?P[^>]+?)(?P=str)[^>]*>)", + r"(?i)(<[img|audio][^>]* src=(?P[\"'])(?P[^>]+?)(?P=str)[^>]*>)", # unquoted case - r"(?i)(]* src=(?!['\"])(?P[^ >]+)[^>]*?>)", + r"(?i)(<[img|audio][^>]* src=(?!['\"])(?P[^ >]+)[^>]*?>)", + # src element quoted case + r"(?i)(]* data=(?P[\"'])(?P[^>]+?)(?P=str)[^>]*>)", + # unquoted case + r"(?i)(]* data=(?!['\"])(?P[^ >]+)[^>]*?>)", ] - regexps = soundRegexps + imgRegexps + regexps = soundRegexps + htmlRegexps def __init__(self, col: anki.collection.Collection, server: bool) -> None: self.col = col.weakref() @@ -173,7 +177,7 @@ class MediaManager: return tag return tag.replace(fname, fn(fname)) - for reg in self.imgRegexps: + for reg in self.htmlRegexps: string = re.sub(reg, repl, string) return string diff --git a/rslib/src/text.rs b/rslib/src/text.rs index 0793281c0..6d8fa5322 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -32,10 +32,10 @@ lazy_static! { )) .unwrap(); - static ref IMG_TAG: Regex = Regex::new( + static ref HTML_TAGS: Regex = Regex::new( r#"(?xsi) - # the start of the image tag - ]+src= + # the start of the image, audio, or object tag + <\b(?:img|audio|object)\b[^>]+\b(?:src|data)\b= (?: # 1: double-quoted filename " @@ -149,7 +149,7 @@ pub(crate) struct MediaRef<'a> { pub(crate) fn extract_media_refs(text: &str) -> Vec { let mut out = vec![]; - for caps in IMG_TAG.captures_iter(text) { + for caps in HTML_TAGS.captures_iter(text) { let fname = caps .get(1) .or_else(|| caps.get(2)) @@ -214,7 +214,7 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag { } pub fn strip_html_preserving_image_filenames(html: &str) -> Cow { - let without_fnames = IMG_TAG.replace_all(html, r" ${1}${2}${3} "); + let without_fnames = HTML_TAGS.replace_all(html, r" ${1}${2}${3} "); let without_html = HTML.replace_all(&without_fnames, ""); // no changes? if let Cow::Borrowed(b) = without_html {