handle ampersand entities in image filenames

In the old HTML editor, filenames were % escaped before feeding them to
beautifulsoup, causing bare ampersands to be left alone. The new HTML
editor reads content from the DOM, where a bare ampersand has been
transformed into an &, and that gets saved back into the field,
so the media check now needs to deal with it for images as well.

https://forums.ankiweb.net/t/causing-problems-with-image-names/12171
This commit is contained in:
Damien Elmes 2021-08-19 23:42:34 +10:00
parent 739ec2798d
commit 53fe7e574e
2 changed files with 12 additions and 2 deletions

View file

@ -723,9 +723,19 @@ Unused: unused.jpg
#[test]
fn html_encoding() {
let field = "[sound:a & b.mp3]";
let mut field = "[sound:a & b.mp3]";
let mut seen = Default::default();
normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp"));
assert!(seen.contains("a & b.mp3"));
field = r#"<img src="a&b.jpg">"#;
seen = Default::default();
normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp"));
assert!(seen.contains("a&b.jpg"));
field = r#"<img src="a&amp;b.jpg">"#;
seen = Default::default();
normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp"));
assert!(seen.contains("a&b.jpg"));
}
}

View file

@ -219,7 +219,7 @@ pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
.or_else(|| caps.get(3))
.unwrap()
.as_str();
let fname_decoded = fname.into();
let fname_decoded = decode_entities(fname);
out.push(MediaRef {
full_ref: caps.get(0).unwrap().as_str(),
fname,