diff --git a/rslib/src/media/check.rs b/rslib/src/media/check.rs index de4dbe773..a6018c1dd 100644 --- a/rslib/src/media/check.rs +++ b/rslib/src/media/check.rs @@ -455,7 +455,7 @@ fn normalize_and_maybe_rename_files<'a>( } // normalize fname into NFC - let mut fname = normalize_to_nfc(media_ref.fname); + let mut fname = normalize_to_nfc(&media_ref.fname_decoded); // and look it up to see if it's been renamed if let Some(new_name) = renamed.get(fname.as_ref()) { fname = new_name.to_owned().into(); @@ -486,7 +486,13 @@ fn normalize_and_maybe_rename_files<'a>( } fn rename_media_ref_in_field(field: &str, media_ref: &MediaRef, new_name: &str) -> String { - let updated_tag = media_ref.full_ref.replace(media_ref.fname, new_name); + let new_name = if matches!(media_ref.fname_decoded, Cow::Owned(_)) { + // filename had quoted characters like & - need to re-encode + htmlescape::encode_minimal(new_name) + } else { + new_name.into() + }; + let updated_tag = media_ref.full_ref.replace(media_ref.fname, &new_name); field.replace(media_ref.full_ref, &updated_tag) } @@ -522,6 +528,7 @@ pub(crate) mod test { pub(crate) const MEDIACHECK_ANKI2: &[u8] = include_bytes!("../../tests/support/mediacheck.anki2"); + use super::normalize_and_maybe_rename_files; use crate::collection::{open_collection, Collection}; use crate::err::Result; use crate::i18n::I18n; @@ -530,7 +537,7 @@ pub(crate) mod test { use crate::media::files::trash_folder; use crate::media::MediaManager; use std::path::Path; - use std::{fs, io}; + use std::{collections::HashMap, fs, io}; use tempfile::{tempdir, TempDir}; fn common_setup() -> Result<(TempDir, MediaManager, Collection)> { @@ -730,4 +737,12 @@ Unused: unused.jpg Ok(()) } + + #[test] + fn html_encoding() { + let field = "[sound:a & b.mp3]"; + let mut seen = Default::default(); + normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp")); + assert!(seen.contains("a & b.mp3")); + } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index 17cf98ec7..0793281c0 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -142,28 +142,36 @@ pub fn extract_av_tags<'a>(text: &'a str, question_side: bool) -> (Cow<'a, str>, pub(crate) struct MediaRef<'a> { pub full_ref: &'a str, pub fname: &'a str, + /// audio files may have things like & that need decoding + pub fname_decoded: Cow<'a, str>, } pub(crate) fn extract_media_refs(text: &str) -> Vec { let mut out = vec![]; for caps in IMG_TAG.captures_iter(text) { + let fname = caps + .get(1) + .or_else(|| caps.get(2)) + .or_else(|| caps.get(3)) + .unwrap() + .as_str(); + let fname_decoded = fname.into(); out.push(MediaRef { full_ref: caps.get(0).unwrap().as_str(), - fname: caps - .get(1) - .or_else(|| caps.get(2)) - .or_else(|| caps.get(3)) - .unwrap() - .as_str(), + fname, + fname_decoded, }); } for caps in AV_TAGS.captures_iter(text) { if let Some(m) = caps.get(1) { + let fname = m.as_str(); + let fname_decoded = decode_entities(fname); out.push(MediaRef { full_ref: caps.get(0).unwrap().as_str(), - fname: m.as_str(), + fname, + fname_decoded, }); } }