handle quoted html chars in media check

https://forums.ankiweb.net/t/unable-to-play-longer-audio-on-cards/1313/30
This commit is contained in:
Damien Elmes 2020-09-04 09:26:21 +10:00
parent dc1f489d3e
commit c82a084edf
2 changed files with 33 additions and 10 deletions

View file

@ -455,7 +455,7 @@ fn normalize_and_maybe_rename_files<'a>(
}
// normalize fname into NFC
let mut fname = normalize_to_nfc(media_ref.fname);
let mut fname = normalize_to_nfc(&media_ref.fname_decoded);
// and look it up to see if it's been renamed
if let Some(new_name) = renamed.get(fname.as_ref()) {
fname = new_name.to_owned().into();
@ -486,7 +486,13 @@ fn normalize_and_maybe_rename_files<'a>(
}
fn rename_media_ref_in_field(field: &str, media_ref: &MediaRef, new_name: &str) -> String {
let updated_tag = media_ref.full_ref.replace(media_ref.fname, new_name);
let new_name = if matches!(media_ref.fname_decoded, Cow::Owned(_)) {
// filename had quoted characters like &amp; - need to re-encode
htmlescape::encode_minimal(new_name)
} else {
new_name.into()
};
let updated_tag = media_ref.full_ref.replace(media_ref.fname, &new_name);
field.replace(media_ref.full_ref, &updated_tag)
}
@ -522,6 +528,7 @@ pub(crate) mod test {
pub(crate) const MEDIACHECK_ANKI2: &[u8] =
include_bytes!("../../tests/support/mediacheck.anki2");
use super::normalize_and_maybe_rename_files;
use crate::collection::{open_collection, Collection};
use crate::err::Result;
use crate::i18n::I18n;
@ -530,7 +537,7 @@ pub(crate) mod test {
use crate::media::files::trash_folder;
use crate::media::MediaManager;
use std::path::Path;
use std::{fs, io};
use std::{collections::HashMap, fs, io};
use tempfile::{tempdir, TempDir};
fn common_setup() -> Result<(TempDir, MediaManager, Collection)> {
@ -730,4 +737,12 @@ Unused: unused.jpg
Ok(())
}
#[test]
fn html_encoding() {
let field = "[sound:a &amp; b.mp3]";
let mut seen = Default::default();
normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp"));
assert!(seen.contains("a & b.mp3"));
}
}

View file

@ -142,28 +142,36 @@ pub fn extract_av_tags<'a>(text: &'a str, question_side: bool) -> (Cow<'a, str>,
pub(crate) struct MediaRef<'a> {
pub full_ref: &'a str,
pub fname: &'a str,
/// audio files may have things like &amp; that need decoding
pub fname_decoded: Cow<'a, str>,
}
pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
let mut out = vec![];
for caps in IMG_TAG.captures_iter(text) {
let fname = caps
.get(1)
.or_else(|| caps.get(2))
.or_else(|| caps.get(3))
.unwrap()
.as_str();
let fname_decoded = fname.into();
out.push(MediaRef {
full_ref: caps.get(0).unwrap().as_str(),
fname: caps
.get(1)
.or_else(|| caps.get(2))
.or_else(|| caps.get(3))
.unwrap()
.as_str(),
fname,
fname_decoded,
});
}
for caps in AV_TAGS.captures_iter(text) {
if let Some(m) = caps.get(1) {
let fname = m.as_str();
let fname_decoded = decode_entities(fname);
out.push(MediaRef {
full_ref: caps.get(0).unwrap().as_str(),
fname: m.as_str(),
fname,
fname_decoded,
});
}
}