Add option to tag notes with missing media (#2379)

* Keep track of notes with missing media files

* Add option to tag notes with missing media

* Update ftl/core/media-check.ftl (dae)
This commit is contained in:
RumovZ 2023-02-20 09:48:09 +01:00 committed by GitHub
parent e53f38a78e
commit 85aebae573
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 105 additions and 57 deletions

View file

@ -73,3 +73,7 @@ media-check-empty-trash = Empty Trash
# button to move deleted files from the trash back into the media folder # button to move deleted files from the trash back into the media folder
media-check-restore-trash = Restore Deleted media-check-restore-trash = Restore Deleted
media-check-check-media-action = Check Media media-check-check-media-action = Check Media
# a tag for notes with missing media files (must not contain whitespace)
media-check-missing-media-tag = missing-media
# add a tag to notes with missing media
media-check-add-tag = Tag Missing

View file

@ -20,8 +20,9 @@ service MediaService {
message CheckMediaResponse { message CheckMediaResponse {
repeated string unused = 1; repeated string unused = 1;
repeated string missing = 2; repeated string missing = 2;
string report = 3; repeated int64 missing_media_notes = 3;
bool have_trash = 4; string report = 4;
bool have_trash = 5;
} }
message TrashMediaFilesRequest { message TrashMediaFilesRequest {

View file

@ -13,8 +13,10 @@ import aqt.progress
from anki.collection import Collection, SearchNode from anki.collection import Collection, SearchNode
from anki.errors import Interrupted from anki.errors import Interrupted
from anki.media import CheckMediaResponse from anki.media import CheckMediaResponse
from anki.notes import NoteId
from aqt import gui_hooks from aqt import gui_hooks
from aqt.operations import QueryOp from aqt.operations import QueryOp
from aqt.operations.tag import add_tags_to_notes
from aqt.qt import * from aqt.qt import *
from aqt.utils import ( from aqt.utils import (
askUser, askUser,
@ -121,6 +123,14 @@ class MediaChecker:
qconnect(b.clicked, lambda c: self._on_trash_files(output.unused)) qconnect(b.clicked, lambda c: self._on_trash_files(output.unused))
if output.missing: if output.missing:
b = QPushButton(tr.media_check_add_tag())
b.setAutoDefault(False)
box.addButton(b, QDialogButtonBox.ButtonRole.RejectRole)
qconnect(
b.clicked,
lambda: add_missing_media_tag(self.mw, output.missing_media_notes),
)
if any(map(lambda x: x.startswith("latex-"), output.missing)): if any(map(lambda x: x.startswith("latex-"), output.missing)):
b = QPushButton(tr.media_check_render_latex()) b = QPushButton(tr.media_check_render_latex())
b.setAutoDefault(False) b.setAutoDefault(False)
@ -233,3 +243,11 @@ class MediaChecker:
tooltip(tr.media_check_trash_restored()) tooltip(tr.media_check_trash_restored())
self.mw.taskman.run_in_background(restore_trash, on_done) self.mw.taskman.run_in_background(restore_trash, on_done)
def add_missing_media_tag(parent: QWidget, missing_media_notes: Sequence[int]) -> None:
add_tags_to_notes(
parent=parent,
note_ids=list(map(NoteId, missing_media_notes)),
space_separated_tags=tr.media_check_missing_media_tag(),
).run_in_background()

View file

@ -1,6 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors // Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use super::notes::to_i64s;
use super::progress::Progress; use super::progress::Progress;
use super::Backend; use super::Backend;
use crate::media::check::MediaChecker; use crate::media::check::MediaChecker;
@ -28,6 +29,7 @@ impl MediaService for Backend {
Ok(pb::media::CheckMediaResponse { Ok(pb::media::CheckMediaResponse {
unused: output.unused, unused: output.unused,
missing: output.missing, missing: output.missing,
missing_media_notes: to_i64s(output.missing_media_notes),
report, report,
have_trash: output.trash_count > 0, have_trash: output.trash_count > 0,
}) })

View file

@ -172,3 +172,7 @@ impl NotesService for Backend {
pub(super) fn to_note_ids(ids: Vec<i64>) -> Vec<NoteId> { pub(super) fn to_note_ids(ids: Vec<i64>) -> Vec<NoteId> {
ids.into_iter().map(NoteId).collect() ids.into_iter().map(NoteId).collect()
} }
pub(super) fn to_i64s(ids: Vec<NoteId>) -> Vec<i64> {
ids.into_iter().map(Into::into).collect()
}

View file

@ -11,17 +11,14 @@ use std::path::Path;
use anki_i18n::without_unicode_isolation; use anki_i18n::without_unicode_isolation;
use tracing::debug; use tracing::debug;
use crate::collection::Collection;
use crate::error::AnkiError;
use crate::error::DbErrorKind; use crate::error::DbErrorKind;
use crate::error::Result;
use crate::latex::extract_latex_expanding_clozes; use crate::latex::extract_latex_expanding_clozes;
use crate::media::files::data_for_file; use crate::media::files::data_for_file;
use crate::media::files::filename_if_normalized; use crate::media::files::filename_if_normalized;
use crate::media::files::normalize_nfc_filename; use crate::media::files::normalize_nfc_filename;
use crate::media::files::trash_folder; use crate::media::files::trash_folder;
use crate::media::MediaManager; use crate::media::MediaManager;
use crate::notes::Note; use crate::prelude::*;
use crate::sync::media::MAX_INDIVIDUAL_MEDIA_FILE_SIZE; use crate::sync::media::MAX_INDIVIDUAL_MEDIA_FILE_SIZE;
use crate::text::extract_media_refs; use crate::text::extract_media_refs;
use crate::text::normalize_to_nfc; use crate::text::normalize_to_nfc;
@ -32,6 +29,7 @@ use crate::text::REMOTE_FILENAME;
pub struct MediaCheckOutput { pub struct MediaCheckOutput {
pub unused: Vec<String>, pub unused: Vec<String>,
pub missing: Vec<String>, pub missing: Vec<String>,
pub missing_media_notes: Vec<NoteId>,
pub renamed: HashMap<String, String>, pub renamed: HashMap<String, String>,
pub dirs: Vec<String>, pub dirs: Vec<String>,
pub oversize: Vec<String>, pub oversize: Vec<String>,
@ -76,12 +74,13 @@ where
pub fn check(&mut self) -> Result<MediaCheckOutput> { pub fn check(&mut self) -> Result<MediaCheckOutput> {
let folder_check = self.check_media_folder()?; let folder_check = self.check_media_folder()?;
let referenced_files = self.check_media_references(&folder_check.renamed)?; let references = self.check_media_references(&folder_check.renamed)?;
let (unused, missing) = find_unused_and_missing(folder_check.files, referenced_files); let unused_and_missing = UnusedAndMissingFiles::new(folder_check.files, references);
let (trash_count, trash_bytes) = self.files_in_trash()?; let (trash_count, trash_bytes) = self.files_in_trash()?;
Ok(MediaCheckOutput { Ok(MediaCheckOutput {
unused, unused: unused_and_missing.unused,
missing, missing: unused_and_missing.missing,
missing_media_notes: unused_and_missing.missing_media_notes,
renamed: folder_check.renamed, renamed: folder_check.renamed,
dirs: folder_check.dirs, dirs: folder_check.dirs,
oversize: folder_check.oversize, oversize: folder_check.oversize,
@ -345,8 +344,8 @@ where
fn check_media_references( fn check_media_references(
&mut self, &mut self,
renamed: &HashMap<String, String>, renamed: &HashMap<String, String>,
) -> Result<HashSet<String>> { ) -> Result<HashMap<String, Vec<NoteId>>> {
let mut referenced_files = HashSet::new(); let mut referenced_files = HashMap::new();
let notetypes = self.ctx.get_all_notetypes()?; let notetypes = self.ctx.get_all_notetypes()?;
let mut collection_modified = false; let mut collection_modified = false;
@ -361,12 +360,14 @@ where
let nt = notetypes.get(&note.notetype_id).ok_or_else(|| { let nt = notetypes.get(&note.notetype_id).ok_or_else(|| {
AnkiError::db_error("missing note type", DbErrorKind::MissingEntity) AnkiError::db_error("missing note type", DbErrorKind::MissingEntity)
})?; })?;
if fix_and_extract_media_refs( let mut tracker = |fname| {
&mut note, referenced_files
&mut referenced_files, .entry(fname)
renamed, .or_insert_with(Vec::new)
&self.mgr.media_folder, .push(nid)
)? { };
if fix_and_extract_media_refs(&mut note, &mut tracker, renamed, &self.mgr.media_folder)?
{
// note was modified, needs saving // note was modified, needs saving
note.prepare_for_update(nt, false)?; note.prepare_for_update(nt, false)?;
note.set_modified(usn); note.set_modified(usn);
@ -375,7 +376,7 @@ where
} }
// extract latex // extract latex
extract_latex_refs(&note, &mut referenced_files, nt.config.latex_svg); extract_latex_refs(&note, &mut tracker, nt.config.latex_svg);
} }
if collection_modified { if collection_modified {
@ -390,7 +391,7 @@ where
/// Returns true if note was modified. /// Returns true if note was modified.
fn fix_and_extract_media_refs( fn fix_and_extract_media_refs(
note: &mut Note, note: &mut Note,
seen_files: &mut HashSet<String>, mut tracker: impl FnMut(String),
renamed: &HashMap<String, String>, renamed: &HashMap<String, String>,
media_folder: &Path, media_folder: &Path,
) -> Result<bool> { ) -> Result<bool> {
@ -400,7 +401,7 @@ fn fix_and_extract_media_refs(
let field = normalize_and_maybe_rename_files( let field = normalize_and_maybe_rename_files(
&note.fields()[idx], &note.fields()[idx],
renamed, renamed,
seen_files, &mut tracker,
media_folder, media_folder,
); );
if let Cow::Owned(field) = field { if let Cow::Owned(field) = field {
@ -418,7 +419,7 @@ fn fix_and_extract_media_refs(
fn normalize_and_maybe_rename_files<'a>( fn normalize_and_maybe_rename_files<'a>(
field: &'a str, field: &'a str,
renamed: &HashMap<String, String>, renamed: &HashMap<String, String>,
seen_files: &mut HashSet<String>, mut tracker: impl FnMut(String),
media_folder: &Path, media_folder: &Path,
) -> Cow<'a, str> { ) -> Cow<'a, str> {
let refs = extract_media_refs(field); let refs = extract_media_refs(field);
@ -455,7 +456,7 @@ fn normalize_and_maybe_rename_files<'a>(
field = rename_media_ref_in_field(field.as_ref(), &media_ref, new_name).into(); field = rename_media_ref_in_field(field.as_ref(), &media_ref, new_name).into();
} }
// and mark this filename as having been referenced // and mark this filename as having been referenced
seen_files.insert(fname.into_owned()); tracker(fname.into_owned());
} }
field field
@ -472,29 +473,43 @@ fn rename_media_ref_in_field(field: &str, media_ref: &MediaRef, new_name: &str)
field.replace(media_ref.full_ref, &updated_tag) field.replace(media_ref.full_ref, &updated_tag)
} }
/// Returns (unused, missing) struct UnusedAndMissingFiles {
fn find_unused_and_missing( unused: Vec<String>,
files: Vec<String>, missing: Vec<String>,
mut references: HashSet<String>, missing_media_notes: Vec<NoteId>,
) -> (Vec<String>, Vec<String>) {
let mut unused = vec![];
for file in files {
if !file.starts_with('_') && !references.contains(&file) {
unused.push(file);
} else {
references.remove(&file);
}
}
(unused, references.into_iter().collect())
} }
fn extract_latex_refs(note: &Note, seen_files: &mut HashSet<String>, svg: bool) { impl UnusedAndMissingFiles {
fn new(files: Vec<String>, mut references: HashMap<String, Vec<NoteId>>) -> Self {
let mut unused = vec![];
for file in files {
if !file.starts_with('_') && !references.contains_key(&file) {
unused.push(file);
} else {
references.remove(&file);
}
}
let mut missing = Vec::new();
let mut notes = HashSet::new();
for (fname, nids) in references {
missing.push(fname);
notes.extend(nids);
}
Self {
unused,
missing,
missing_media_notes: notes.into_iter().collect(),
}
}
}
fn extract_latex_refs(note: &Note, mut tracker: impl FnMut(String), svg: bool) {
for field in note.fields() { for field in note.fields() {
let (_, extracted) = extract_latex_expanding_clozes(field, svg); let (_, extracted) = extract_latex_expanding_clozes(field, svg);
for e in extracted { for e in extracted {
seen_files.insert(e.fname); tracker(e.fname);
} }
} }
} }
@ -505,23 +520,14 @@ pub(crate) mod test {
include_bytes!("../../tests/support/mediacheck.anki2"); include_bytes!("../../tests/support/mediacheck.anki2");
use std::collections::HashMap; use std::collections::HashMap;
use std::fs;
use std::io;
use std::path::Path;
use tempfile::tempdir; use tempfile::tempdir;
use tempfile::TempDir; use tempfile::TempDir;
use super::normalize_and_maybe_rename_files; use super::*;
use crate::collection::Collection;
use crate::collection::CollectionBuilder; use crate::collection::CollectionBuilder;
use crate::error::Result;
use crate::io::create_dir; use crate::io::create_dir;
use crate::io::write_file; use crate::io::write_file;
use crate::media::check::MediaCheckOutput;
use crate::media::check::MediaChecker;
use crate::media::files::trash_folder;
use crate::media::MediaManager;
fn common_setup() -> Result<(TempDir, MediaManager, Collection)> { fn common_setup() -> Result<(TempDir, MediaManager, Collection)> {
let dir = tempdir()?; let dir = tempdir()?;
@ -565,6 +571,7 @@ pub(crate) mod test {
MediaCheckOutput { MediaCheckOutput {
unused: vec!["unused.jpg".into()], unused: vec!["unused.jpg".into()],
missing: vec!["ぱぱ.jpg".into()], missing: vec!["ぱぱ.jpg".into()],
missing_media_notes: vec![NoteId(1581236461568)],
renamed: vec![("foo[.jpg".into(), "foo.jpg".into())] renamed: vec![("foo[.jpg".into(), "foo.jpg".into())]
.into_iter() .into_iter()
.collect(), .collect(),
@ -687,6 +694,7 @@ Unused: unused.jpg
MediaCheckOutput { MediaCheckOutput {
unused: vec![], unused: vec![],
missing: vec!["foo[.jpg".into(), "normal.jpg".into()], missing: vec!["foo[.jpg".into(), "normal.jpg".into()],
missing_media_notes: vec![NoteId(1581236386334)],
renamed: Default::default(), renamed: Default::default(),
dirs: vec![], dirs: vec![],
oversize: vec![], oversize: vec![],
@ -702,6 +710,7 @@ Unused: unused.jpg
MediaCheckOutput { MediaCheckOutput {
unused: vec![], unused: vec![],
missing: vec!["foo[.jpg".into(), "normal.jpg".into()], missing: vec!["foo[.jpg".into(), "normal.jpg".into()],
missing_media_notes: vec![NoteId(1581236386334)],
renamed: vec![("ぱぱ.jpg".into(), "ぱぱ.jpg".into())] renamed: vec![("ぱぱ.jpg".into(), "ぱぱ.jpg".into())]
.into_iter() .into_iter()
.collect(), .collect(),
@ -718,21 +727,31 @@ Unused: unused.jpg
Ok(()) Ok(())
} }
fn normalize_and_maybe_rename_files_helper(field: &str) -> HashSet<String> {
let mut seen = HashSet::new();
normalize_and_maybe_rename_files(
field,
&HashMap::new(),
|fname| {
seen.insert(fname);
},
Path::new("/tmp"),
);
seen
}
#[test] #[test]
fn html_encoding() { fn html_encoding() {
let mut field = "[sound:a &amp; b.mp3]"; let mut field = "[sound:a &amp; b.mp3]";
let mut seen = Default::default(); let seen = normalize_and_maybe_rename_files_helper(field);
normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp"));
assert!(seen.contains("a & b.mp3")); assert!(seen.contains("a & b.mp3"));
field = r#"<img src="a&b.jpg">"#; field = r#"<img src="a&b.jpg">"#;
seen = Default::default(); let seen = normalize_and_maybe_rename_files_helper(field);
normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp"));
assert!(seen.contains("a&b.jpg")); assert!(seen.contains("a&b.jpg"));
field = r#"<img src="a&amp;b.jpg">"#; field = r#"<img src="a&amp;b.jpg">"#;
seen = Default::default(); let seen = normalize_and_maybe_rename_files_helper(field);
normalize_and_maybe_rename_files(field, &HashMap::new(), &mut seen, Path::new("/tmp"));
assert!(seen.contains("a&b.jpg")); assert!(seen.contains("a&b.jpg"));
} }
} }