From 146fd2a6b6b2c2da10927aeefc60533a8ac5c111 Mon Sep 17 00:00:00 2001 From: RumovZ Date: Thu, 28 Apr 2022 22:32:12 +0200 Subject: [PATCH] Compare checksums when importing colpkgs --- proto/anki/import_export.proto | 1 + qt/aqt/import_export/importing.py | 6 +++- rslib/src/backend/import_export.rs | 6 +++- .../import_export/package/colpkg/import.rs | 32 ++++++++++++++++--- .../src/import_export/package/colpkg/tests.rs | 13 ++++++-- rslib/src/import_export/package/media.rs | 17 +++++----- rslib/src/media/mod.rs | 17 ++++++++++ 7 files changed, 73 insertions(+), 19 deletions(-) diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto index c54911144..2b5bb74ba 100644 --- a/proto/anki/import_export.proto +++ b/proto/anki/import_export.proto @@ -23,6 +23,7 @@ message ImportCollectionPackageRequest { string col_path = 1; string backup_path = 2; string media_folder = 3; + string media_db = 4; } message ExportCollectionPackageRequest { diff --git a/qt/aqt/import_export/importing.py b/qt/aqt/import_export/importing.py index 39f092ad9..3260817d7 100644 --- a/qt/aqt/import_export/importing.py +++ b/qt/aqt/import_export/importing.py @@ -87,8 +87,12 @@ def import_collection_package_op( def op(_: Collection) -> None: col_path = mw.pm.collectionPath() media_folder = os.path.join(mw.pm.profileFolder(), "collection.media") + media_db = os.path.join(mw.pm.profileFolder(), "collection.media.db2") mw.backend.import_collection_package( - col_path=col_path, backup_path=path, media_folder=media_folder + col_path=col_path, + backup_path=path, + media_folder=media_folder, + media_db=media_db, ) return QueryOp(parent=mw, op=op, success=lambda _: success()).with_backend_progress( diff --git a/rslib/src/backend/import_export.rs b/rslib/src/backend/import_export.rs index d8c2739cb..e54ec01f7 100644 --- a/rslib/src/backend/import_export.rs +++ b/rslib/src/backend/import_export.rs @@ -1,6 +1,8 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +use std::path::Path; + use super::{progress::Progress, Backend}; pub(super) use crate::backend_proto::importexport_service::Service as ImportExportService; use crate::{ @@ -42,8 +44,10 @@ impl ImportExportService for Backend { import_colpkg( &input.backup_path, &input.col_path, - &input.media_folder, + Path::new(&input.media_folder), + Path::new(&input.media_db), self.import_progress_fn(), + &self.log, ) .map(Into::into) } diff --git a/rslib/src/import_export/package/colpkg/import.rs b/rslib/src/import_export/package/colpkg/import.rs index fa86eb59f..c07e7a071 100644 --- a/rslib/src/import_export/package/colpkg/import.rs +++ b/rslib/src/import_export/package/colpkg/import.rs @@ -21,14 +21,17 @@ use crate::{ ImportProgress, IncrementalProgress, }, io::{atomic_rename, tempfile_in_parent_of}, + media::MediaManager, prelude::*, }; pub fn import_colpkg( colpkg_path: &str, target_col_path: &str, - target_media_folder: &str, + target_media_folder: &Path, + media_db: &Path, mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, + log: &Logger, ) -> Result<()> { progress_fn(ImportProgress::Collection)?; let col_path = PathBuf::from(target_col_path); @@ -43,8 +46,14 @@ pub fn import_colpkg( check_collection_and_mod_schema(tempfile.path())?; progress_fn(ImportProgress::Collection)?; - let media_folder = Path::new(target_media_folder); - restore_media(&meta, progress_fn, &mut archive, media_folder)?; + restore_media( + &meta, + progress_fn, + &mut archive, + target_media_folder, + media_db, + log, + )?; atomic_rename(tempfile, &col_path, true) } @@ -70,14 +79,26 @@ fn restore_media( mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, archive: &mut ZipArchive, media_folder: &Path, + media_db: &Path, + log: &Logger, ) -> Result<()> { let media_entries = extract_media_entries(meta, archive)?; + if media_entries.is_empty() { + return Ok(()); + } + std::fs::create_dir_all(media_folder)?; + + let media_manager = MediaManager::new(media_folder, media_db)?; + let mut db_progress_fn = |u| progress_fn(ImportProgress::MediaCheck(u)).is_ok(); + media_manager.register_changes(&mut db_progress_fn, log)?; + + let mut get_checksum = media_manager.checksum_getter(); let mut progress = IncrementalProgress::new(|u| progress_fn(ImportProgress::Media(u))); for entry in &media_entries { progress.increment()?; - maybe_restore_media_file(meta, media_folder, archive, entry)?; + maybe_restore_media_file(meta, media_folder, archive, entry, &mut get_checksum)?; } Ok(()) @@ -88,10 +109,11 @@ fn maybe_restore_media_file( media_folder: &Path, archive: &mut ZipArchive, entry: &SafeMediaEntry, + get_checksum: &mut impl FnMut(&str) -> Result>, ) -> Result<()> { let file_path = entry.file_path(media_folder); let mut zip_file = entry.fetch_file(archive)?; - let already_exists = entry.is_equal_to(meta, &zip_file, &file_path); + let already_exists = entry.is_equal_to(meta, &zip_file, &file_path, get_checksum)?; if !already_exists { restore_media_file(meta, &mut zip_file, &file_path)?; }; diff --git a/rslib/src/import_export/package/colpkg/tests.rs b/rslib/src/import_export/package/colpkg/tests.rs index c901677d8..58e59b84f 100644 --- a/rslib/src/import_export/package/colpkg/tests.rs +++ b/rslib/src/import_export/package/colpkg/tests.rs @@ -8,8 +8,8 @@ use std::path::Path; use tempfile::tempdir; use crate::{ - collection::CollectionBuilder, import_export::package::import_colpkg, media::MediaManager, - prelude::*, + collection::CollectionBuilder, import_export::package::import_colpkg, log::terminal, + media::MediaManager, prelude::*, }; fn collection_with_media(dir: &Path, name: &str) -> Result { @@ -42,18 +42,25 @@ fn roundtrip() -> Result<()> { let col = collection_with_media(dir, name)?; let colpkg_name = dir.join(format!("{name}.colpkg")); col.export_colpkg(&colpkg_name, true, legacy, |_| Ok(()))?; + // import into a new collection let anki2_name = dir .join(format!("{name}.anki2")) .to_string_lossy() .into_owned(); let import_media_dir = dir.join(format!("{name}.media")); + std::fs::create_dir_all(&import_media_dir)?; + let import_media_db = dir.join(format!("{name}.mdb")); + MediaManager::new(&import_media_dir, &import_media_db)?; import_colpkg( &colpkg_name.to_string_lossy(), &anki2_name, - import_media_dir.to_str().unwrap(), + &import_media_dir, + &import_media_db, |_| Ok(()), + &terminal(), )?; + // confirm collection imported let col = CollectionBuilder::new(&anki2_name).build()?; assert_eq!( diff --git a/rslib/src/import_export/package/media.rs b/rslib/src/import_export/package/media.rs index fcf5461d4..3fb370347 100644 --- a/rslib/src/import_export/package/media.rs +++ b/rslib/src/import_export/package/media.rs @@ -25,6 +25,7 @@ use crate::{ /// Like [MediaEntry], but with a safe filename and set zip filename. pub(super) struct SafeMediaEntry { pub(super) name: String, + #[allow(dead_code)] pub(super) size: u32, pub(super) sha1: Sha1Hash, pub(super) index: usize, @@ -90,16 +91,14 @@ impl SafeMediaEntry { meta: &Meta, self_zipped: &ZipFile, other_path: &Path, - ) -> bool { - // TODO: check hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147) - let self_size = if meta.media_list_is_hashmap() { - self_zipped.size() + get_checksum: &mut impl FnMut(&str) -> Result>, + ) -> Result { + if meta.media_list_is_hashmap() { + Ok(fs::metadata(other_path) + .map_or(false, |metadata| metadata.len() == self_zipped.size())) } else { - self.size as u64 - }; - fs::metadata(other_path) - .map(|metadata| metadata.len() as u64 == self_size) - .unwrap_or_default() + get_checksum(&self.name).map(|opt| opt.map_or(false, |sha1| sha1 == self.sha1)) + } } pub(super) fn copy_from_archive( diff --git a/rslib/src/media/mod.rs b/rslib/src/media/mod.rs index 9feeaafcd..de3fb73e6 100644 --- a/rslib/src/media/mod.rs +++ b/rslib/src/media/mod.rs @@ -167,4 +167,21 @@ impl MediaManager { ChangeTracker::new(&self.media_folder, progress, log).register_changes(&mut dbctx)?; dbctx.all_checksums() } + + pub fn checksum_getter(&self) -> impl FnMut(&str) -> Result> + '_ { + let mut dbctx = self.dbctx(); + move |fname: &str| { + dbctx + .get_entry(fname) + .map(|opt| opt.and_then(|entry| entry.sha1)) + } + } + + pub fn register_changes( + &self, + progress: &mut impl FnMut(usize) -> bool, + log: &Logger, + ) -> Result<()> { + ChangeTracker::new(&self.media_folder, progress, log).register_changes(&mut self.dbctx()) + } }