,
}
-pub struct MediaChecker<'a, P>
+pub struct MediaChecker<'a, 'b, P>
where
P: FnMut(usize) -> bool,
{
+ ctx: &'a mut RequestContext<'b>,
mgr: &'a MediaManager,
- col_path: &'a Path,
progress_cb: P,
checked: usize,
progress_updated: Instant,
- i18n: &'a I18n,
- log: &'a Logger,
}
-impl MediaChecker<'_, P>
+impl
MediaChecker<'_, '_, P>
where
P: FnMut(usize) -> bool,
{
- pub fn new<'a>(
+ pub(crate) fn new<'a, 'b>(
+ ctx: &'a mut RequestContext<'b>,
mgr: &'a MediaManager,
- col_path: &'a Path,
progress_cb: P,
- i18n: &'a I18n,
- log: &'a Logger,
- ) -> MediaChecker<'a, P> {
+ ) -> MediaChecker<'a, 'b, P> {
MediaChecker {
+ ctx,
mgr,
- col_path,
progress_cb,
checked: 0,
progress_updated: Instant::now(),
- i18n,
- log,
}
}
@@ -100,7 +93,7 @@ where
pub fn summarize_output(&self, output: &mut MediaCheckOutput) -> String {
let mut buf = String::new();
- let i = &self.i18n;
+ let i = &self.ctx.i18n;
// top summary area
if output.trash_count > 0 {
@@ -279,7 +272,7 @@ where
}
})?;
let fname = self.mgr.add_file(ctx, disk_fname, &data)?;
- debug!(self.log, "renamed"; "from"=>disk_fname, "to"=>&fname.as_ref());
+ debug!(self.ctx.log, "renamed"; "from"=>disk_fname, "to"=>&fname.as_ref());
assert_ne!(fname.as_ref(), disk_fname);
// remove the original file
@@ -373,7 +366,7 @@ where
self.mgr
.add_file(&mut self.mgr.dbctx(), fname.as_ref(), &data)?;
} else {
- debug!(self.log, "file disappeared while restoring trash"; "fname"=>fname.as_ref());
+ debug!(self.ctx.log, "file disappeared while restoring trash"; "fname"=>fname.as_ref());
}
fs::remove_file(dentry.path())?;
}
@@ -387,14 +380,11 @@ where
&mut self,
renamed: &HashMap,
) -> Result> {
- let mut db = open_or_create_collection_db(self.col_path)?;
- let trx = db.transaction()?;
-
let mut referenced_files = HashSet::new();
- let note_types = get_note_types(&trx)?;
+ let note_types = self.ctx.storage.all_note_types()?;
let mut collection_modified = false;
- for_every_note(&trx, |note| {
+ for_every_note(&self.ctx.storage.db, |note| {
self.checked += 1;
if self.checked % 10 == 0 {
self.maybe_fire_progress_cb()?;
@@ -403,10 +393,16 @@ where
.get(¬e.mid)
.ok_or_else(|| AnkiError::DBError {
info: "missing note type".to_string(),
+ kind: DBErrorKind::MissingEntity,
})?;
- if fix_and_extract_media_refs(note, &mut referenced_files, renamed)? {
+ if fix_and_extract_media_refs(
+ note,
+ &mut referenced_files,
+ renamed,
+ &self.mgr.media_folder,
+ )? {
// note was modified, needs saving
- set_note(&trx, note, nt)?;
+ set_note(&self.ctx.storage.db, note, nt)?;
collection_modified = true;
}
@@ -415,9 +411,8 @@ where
Ok(())
})?;
- if collection_modified {
- mark_collection_modified(&trx)?;
- trx.commit()?;
+ if !collection_modified {
+ self.ctx.should_commit = false;
}
Ok(referenced_files)
@@ -429,11 +424,17 @@ fn fix_and_extract_media_refs(
note: &mut Note,
seen_files: &mut HashSet,
renamed: &HashMap,
+ media_folder: &Path,
) -> Result {
let mut updated = false;
for idx in 0..note.fields().len() {
- let field = normalize_and_maybe_rename_files(¬e.fields()[idx], renamed, seen_files);
+ let field = normalize_and_maybe_rename_files(
+ ¬e.fields()[idx],
+ renamed,
+ seen_files,
+ media_folder,
+ );
if let Cow::Owned(field) = field {
// field was modified, need to save
note.set_field(idx, field)?;
@@ -450,6 +451,7 @@ fn normalize_and_maybe_rename_files<'a>(
field: &'a str,
renamed: &HashMap,
seen_files: &mut HashSet,
+ media_folder: &Path,
) -> Cow<'a, str> {
let refs = extract_media_refs(field);
let mut field: Cow = field.into();
@@ -466,7 +468,21 @@ fn normalize_and_maybe_rename_files<'a>(
if let Some(new_name) = renamed.get(fname.as_ref()) {
fname = new_name.to_owned().into();
}
- // if it was not in NFC or was renamed, update the field
+ // if the filename was in NFC and was not renamed as part of the
+ // media check, it may have already been renamed during a previous
+ // sync. If that's the case and the renamed version exists on disk,
+ // we'll need to update the field to match it. It may be possible
+ // to remove this check in the future once we can be sure all media
+ // files stored on AnkiWeb are in normalized form.
+ if matches!(fname, Cow::Borrowed(_)) {
+ if let Cow::Owned(normname) = normalize_nfc_filename(fname.as_ref().into()) {
+ let path = media_folder.join(&normname);
+ if path.exists() {
+ fname = normname.into();
+ }
+ }
+ }
+ // update the field if the filename was modified
if let Cow::Owned(ref new_name) = fname {
field = rename_media_ref_in_field(field.as_ref(), &media_ref, new_name).into();
}
@@ -510,41 +526,42 @@ fn extract_latex_refs(note: &Note, seen_files: &mut HashSet, svg: bool)
}
#[cfg(test)]
-mod test {
+pub(crate) mod test {
+ pub(crate) const MEDIACHECK_ANKI2: &'static [u8] =
+ include_bytes!("../../tests/support/mediacheck.anki2");
+
+ use crate::collection::{open_collection, Collection};
use crate::err::Result;
use crate::i18n::I18n;
use crate::log;
- use crate::log::Logger;
use crate::media::check::{MediaCheckOutput, MediaChecker};
use crate::media::files::trash_folder;
use crate::media::MediaManager;
- use std::path::{Path, PathBuf};
+ use std::path::Path;
use std::{fs, io};
use tempfile::{tempdir, TempDir};
- fn common_setup() -> Result<(TempDir, MediaManager, PathBuf, Logger, I18n)> {
+ fn common_setup() -> Result<(TempDir, MediaManager, Collection)> {
let dir = tempdir()?;
let media_dir = dir.path().join("media");
fs::create_dir(&media_dir)?;
let media_db = dir.path().join("media.db");
let col_path = dir.path().join("col.anki2");
- fs::write(
- &col_path,
- &include_bytes!("../../tests/support/mediacheck.anki2")[..],
- )?;
+ fs::write(&col_path, MEDIACHECK_ANKI2)?;
- let mgr = MediaManager::new(&media_dir, media_db)?;
+ let mgr = MediaManager::new(&media_dir, media_db.clone())?;
let log = log::terminal();
-
let i18n = I18n::new(&["zz"], "dummy", log.clone());
- Ok((dir, mgr, col_path, log, i18n))
+ let col = open_collection(col_path, media_dir, media_db, false, i18n, log)?;
+
+ Ok((dir, mgr, col))
}
#[test]
fn media_check() -> Result<()> {
- let (_dir, mgr, col_path, log, i18n) = common_setup()?;
+ let (_dir, mgr, col) = common_setup()?;
// add some test files
fs::write(&mgr.media_folder.join("zerobytes"), "")?;
@@ -555,8 +572,13 @@ mod test {
fs::write(&mgr.media_folder.join("unused.jpg"), "foo")?;
let progress = |_n| true;
- let mut checker = MediaChecker::new(&mgr, &col_path, progress, &i18n, &log);
- let mut output = checker.check()?;
+
+ let (output, report) = col.transact(None, |ctx| {
+ let mut checker = MediaChecker::new(ctx, &mgr, progress);
+ let output = checker.check()?;
+ let summary = checker.summarize_output(&mut output.clone());
+ Ok((output, summary))
+ })?;
assert_eq!(
output,
@@ -576,7 +598,6 @@ mod test {
assert!(fs::metadata(&mgr.media_folder.join("foo[.jpg")).is_err());
assert!(fs::metadata(&mgr.media_folder.join("foo.jpg")).is_ok());
- let report = checker.summarize_output(&mut output);
assert_eq!(
report,
"Missing files: 1
@@ -616,14 +637,16 @@ Unused: unused.jpg
#[test]
fn trash_handling() -> Result<()> {
- let (_dir, mgr, col_path, log, i18n) = common_setup()?;
+ let (_dir, mgr, col) = common_setup()?;
let trash_folder = trash_folder(&mgr.media_folder)?;
fs::write(trash_folder.join("test.jpg"), "test")?;
let progress = |_n| true;
- let mut checker = MediaChecker::new(&mgr, &col_path, progress, &i18n, &log);
- checker.restore_trash()?;
+ col.transact(None, |ctx| {
+ let mut checker = MediaChecker::new(ctx, &mgr, progress);
+ checker.restore_trash()
+ })?;
// file should have been moved to media folder
assert_eq!(files_in_dir(&trash_folder), Vec::::new());
@@ -634,7 +657,10 @@ Unused: unused.jpg
// if we repeat the process, restoring should do the same thing if the contents are equal
fs::write(trash_folder.join("test.jpg"), "test")?;
- checker.restore_trash()?;
+ col.transact(None, |ctx| {
+ let mut checker = MediaChecker::new(ctx, &mgr, progress);
+ checker.restore_trash()
+ })?;
assert_eq!(files_in_dir(&trash_folder), Vec::::new());
assert_eq!(
files_in_dir(&mgr.media_folder),
@@ -643,7 +669,10 @@ Unused: unused.jpg
// but rename if required
fs::write(trash_folder.join("test.jpg"), "test2")?;
- checker.restore_trash()?;
+ col.transact(None, |ctx| {
+ let mut checker = MediaChecker::new(ctx, &mgr, progress);
+ checker.restore_trash()
+ })?;
assert_eq!(files_in_dir(&trash_folder), Vec::::new());
assert_eq!(
files_in_dir(&mgr.media_folder),
@@ -658,13 +687,17 @@ Unused: unused.jpg
#[test]
fn unicode_normalization() -> Result<()> {
- let (_dir, mgr, col_path, log, i18n) = common_setup()?;
+ let (_dir, mgr, col) = common_setup()?;
fs::write(&mgr.media_folder.join("ぱぱ.jpg"), "nfd encoding")?;
let progress = |_n| true;
- let mut checker = MediaChecker::new(&mgr, &col_path, progress, &i18n, &log);
- let mut output = checker.check()?;
+
+ let mut output = col.transact(None, |ctx| {
+ let mut checker = MediaChecker::new(ctx, &mgr, progress);
+ checker.check()
+ })?;
+
output.missing.sort();
if cfg!(target_vendor = "apple") {
diff --git a/rslib/src/media/files.rs b/rslib/src/media/files.rs
index bd8dfdcb4..791f0ad5e 100644
--- a/rslib/src/media/files.rs
+++ b/rslib/src/media/files.rs
@@ -84,7 +84,7 @@ pub(crate) fn normalize_filename(fname: &str) -> Cow {
}
/// See normalize_filename(). This function expects NFC-normalized input.
-fn normalize_nfc_filename(mut fname: Cow) -> Cow {
+pub(crate) fn normalize_nfc_filename(mut fname: Cow) -> Cow {
if fname.chars().any(disallowed_char) {
fname = fname.replace(disallowed_char, "").into()
}
diff --git a/rslib/src/media/mod.rs b/rslib/src/media/mod.rs
index c0dcfcca8..a3351b95c 100644
--- a/rslib/src/media/mod.rs
+++ b/rslib/src/media/mod.rs
@@ -12,7 +12,6 @@ use std::path::{Path, PathBuf};
pub mod changetracker;
pub mod check;
-pub mod col;
pub mod database;
pub mod files;
pub mod sync;
diff --git a/rslib/src/media/sync.rs b/rslib/src/media/sync.rs
index 5bf7dbcc6..7717d8b4e 100644
--- a/rslib/src/media/sync.rs
+++ b/rslib/src/media/sync.rs
@@ -717,6 +717,17 @@ fn zip_files<'a>(
break;
}
+ #[cfg(target_vendor = "apple")]
+ {
+ use unicode_normalization::is_nfc;
+ if !is_nfc(&file.fname) {
+ // older Anki versions stored non-normalized filenames in the DB; clean them up
+ debug!(log, "clean up non-nfc entry"; "fname"=>&file.fname);
+ invalid_entries.push(&file.fname);
+ continue;
+ }
+ }
+
let file_data = if file.sha1.is_some() {
match data_for_file(media_folder, &file.fname) {
Ok(data) => data,
diff --git a/rslib/src/media/col.rs b/rslib/src/notes.rs
similarity index 52%
rename from rslib/src/media/col.rs
rename to rslib/src/notes.rs
index a563bb93e..cad1f614c 100644
--- a/rslib/src/media/col.rs
+++ b/rslib/src/notes.rs
@@ -1,17 +1,17 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
-/// Basic note reading/updating functionality for the media DB check.
-use crate::err::{AnkiError, Result};
+/// At the moment, this is just basic note reading/updating functionality for
+/// the media DB check.
+use crate::err::{AnkiError, DBErrorKind, Result};
use crate::text::strip_html_preserving_image_filenames;
-use crate::time::{i64_unix_millis, i64_unix_secs};
-use crate::types::{ObjID, Timestamp, Usn};
+use crate::time::i64_unix_secs;
+use crate::{
+ notetypes::NoteType,
+ types::{ObjID, Timestamp, Usn},
+};
use rusqlite::{params, Connection, Row, NO_PARAMS};
-use serde_aux::field_attributes::deserialize_number_from_string;
-use serde_derive::Deserialize;
-use std::collections::HashMap;
use std::convert::TryInto;
-use std::path::Path;
#[derive(Debug)]
pub(super) struct Note {
@@ -40,55 +40,13 @@ impl Note {
}
}
-fn field_checksum(text: &str) -> u32 {
+/// Text must be passed to strip_html_preserving_image_filenames() by
+/// caller prior to passing in here.
+pub(crate) fn field_checksum(text: &str) -> u32 {
let digest = sha1::Sha1::from(text).digest().bytes();
u32::from_be_bytes(digest[..4].try_into().unwrap())
}
-pub(super) fn open_or_create_collection_db(path: &Path) -> Result {
- let db = Connection::open(path)?;
-
- db.pragma_update(None, "locking_mode", &"exclusive")?;
- db.pragma_update(None, "page_size", &4096)?;
- db.pragma_update(None, "cache_size", &(-40 * 1024))?;
- db.pragma_update(None, "legacy_file_format", &false)?;
- db.pragma_update(None, "journal", &"wal")?;
- db.set_prepared_statement_cache_capacity(5);
-
- Ok(db)
-}
-
-#[derive(Deserialize, Debug)]
-pub(super) struct NoteType {
- #[serde(deserialize_with = "deserialize_number_from_string")]
- id: ObjID,
- #[serde(rename = "sortf")]
- sort_field_idx: u16,
-
- #[serde(rename = "latexsvg", default)]
- latex_svg: bool,
-}
-
-impl NoteType {
- pub fn latex_uses_svg(&self) -> bool {
- self.latex_svg
- }
-}
-
-pub(super) fn get_note_types(db: &Connection) -> Result> {
- let mut stmt = db.prepare("select models from col")?;
- let note_types = stmt
- .query_and_then(NO_PARAMS, |row| -> Result> {
- let v: HashMap = serde_json::from_str(row.get_raw(0).as_str()?)?;
- Ok(v)
- })?
- .next()
- .ok_or_else(|| AnkiError::DBError {
- info: "col table empty".to_string(),
- })??;
- Ok(note_types)
-}
-
#[allow(dead_code)]
fn get_note(db: &Connection, nid: ObjID) -> Result