Add audio & object tags to media check

Makes the media check recognize files in <audio> and <object> tags as used.

They've been observed/supported by the WebView (checked: Anki, AnkiDroid) since just about forever already and are extremely useful if one knows a thing about web dev.
This commit is contained in:
Andreas Reis 2020-10-14 01:56:57 +02:00
parent 1e37e6cabd
commit 6e9aaad11e
2 changed files with 14 additions and 10 deletions

View file

@ -34,13 +34,17 @@ def media_paths_from_col_path(col_path: str) -> Tuple[str, str]:
class MediaManager: class MediaManager:
soundRegexps = [r"(?i)(\[sound:(?P<fname>[^]]+)\])"] soundRegexps = [r"(?i)(\[sound:(?P<fname>[^]]+)\])"]
imgRegexps = [ htmlRegexps = [
# src element quoted case # src element quoted case
r"(?i)(<img[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)", r"(?i)(<[img|audio][^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
# unquoted case # unquoted case
r"(?i)(<img[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)", r"(?i)(<[img|audio][^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
# src element quoted case
r"(?i)(<object[^>]* data=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
# unquoted case
r"(?i)(<object[^>]* data=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
] ]
regexps = soundRegexps + imgRegexps regexps = soundRegexps + htmlRegexps
def __init__(self, col: anki.collection.Collection, server: bool) -> None: def __init__(self, col: anki.collection.Collection, server: bool) -> None:
self.col = col.weakref() self.col = col.weakref()
@ -173,7 +177,7 @@ class MediaManager:
return tag return tag
return tag.replace(fname, fn(fname)) return tag.replace(fname, fn(fname))
for reg in self.imgRegexps: for reg in self.htmlRegexps:
string = re.sub(reg, repl, string) string = re.sub(reg, repl, string)
return string return string

View file

@ -32,10 +32,10 @@ lazy_static! {
)) ))
.unwrap(); .unwrap();
static ref IMG_TAG: Regex = Regex::new( static ref HTML_TAGS: Regex = Regex::new(
r#"(?xsi) r#"(?xsi)
# the start of the image tag # the start of the image, audio, or object tag
<img[^>]+src= <\b(?:img|audio|object)\b[^>]+\b(?:src|data)\b=
(?: (?:
# 1: double-quoted filename # 1: double-quoted filename
" "
@ -149,7 +149,7 @@ pub(crate) struct MediaRef<'a> {
pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> { pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
let mut out = vec![]; let mut out = vec![];
for caps in IMG_TAG.captures_iter(text) { for caps in HTML_TAGS.captures_iter(text) {
let fname = caps let fname = caps
.get(1) .get(1)
.or_else(|| caps.get(2)) .or_else(|| caps.get(2))
@ -214,7 +214,7 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
} }
pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> { pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
let without_fnames = IMG_TAG.replace_all(html, r" ${1}${2}${3} "); let without_fnames = HTML_TAGS.replace_all(html, r" ${1}${2}${3} ");
let without_html = HTML.replace_all(&without_fnames, ""); let without_html = HTML.replace_all(&without_fnames, "");
// no changes? // no changes?
if let Cow::Borrowed(b) = without_html { if let Cow::Borrowed(b) = without_html {