From 53fe7e574e39eb049ea748437ae228f7e3435884 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Thu, 19 Aug 2021 23:42:34 +1000 Subject: [PATCH] handle ampersand entities in image filenames In the old HTML editor, filenames were % escaped before feeding them to beautifulsoup, causing bare ampersands to be left alone. The new HTML editor reads content from the DOM, where a bare ampersand has been transformed into an &, and that gets saved back into the field, so the media check now needs to deal with it for images as well. https://forums.ankiweb.net/t/causing-problems-with-image-names/12171 --- rslib/src/media/check.rs | 12 +++++++++++- rslib/src/text.rs | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/rslib/src/media/check.rs b/rslib/src/media/check.rs index c901583da..d7adfa361 100644 --- a/rslib/src/media/check.rs +++ b/rslib/src/media/check.rs @@ -723,9 +723,19 @@ Unused: unused.jpg #[test] fn html_encoding() { - let field = "[sound:a & b.mp3]"; + let mut field = "[sound:a & b.mp3]"; let mut seen = Default::default(); normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp")); assert!(seen.contains("a & b.mp3")); + + field = r#""#; + seen = Default::default(); + normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp")); + assert!(seen.contains("a&b.jpg")); + + field = r#""#; + seen = Default::default(); + normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp")); + assert!(seen.contains("a&b.jpg")); } } diff --git a/rslib/src/text.rs b/rslib/src/text.rs index bb4176861..26773b436 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -219,7 +219,7 @@ pub(crate) fn extract_media_refs(text: &str) -> Vec { .or_else(|| caps.get(3)) .unwrap() .as_str(); - let fname_decoded = fname.into(); + let fname_decoded = decode_entities(fname); out.push(MediaRef { full_ref: caps.get(0).unwrap().as_str(), fname,