Compare checksums when importing colpkgs

This commit is contained in:
RumovZ 2022-04-28 22:32:12 +02:00
parent 930f4212b2
commit 146fd2a6b6
7 changed files with 73 additions and 19 deletions

View file

@ -23,6 +23,7 @@ message ImportCollectionPackageRequest {
string col_path = 1;
string backup_path = 2;
string media_folder = 3;
string media_db = 4;
}
message ExportCollectionPackageRequest {

View file

@ -87,8 +87,12 @@ def import_collection_package_op(
def op(_: Collection) -> None:
col_path = mw.pm.collectionPath()
media_folder = os.path.join(mw.pm.profileFolder(), "collection.media")
media_db = os.path.join(mw.pm.profileFolder(), "collection.media.db2")
mw.backend.import_collection_package(
col_path=col_path, backup_path=path, media_folder=media_folder
col_path=col_path,
backup_path=path,
media_folder=media_folder,
media_db=media_db,
)
return QueryOp(parent=mw, op=op, success=lambda _: success()).with_backend_progress(

View file

@ -1,6 +1,8 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::path::Path;
use super::{progress::Progress, Backend};
pub(super) use crate::backend_proto::importexport_service::Service as ImportExportService;
use crate::{
@ -42,8 +44,10 @@ impl ImportExportService for Backend {
import_colpkg(
&input.backup_path,
&input.col_path,
&input.media_folder,
Path::new(&input.media_folder),
Path::new(&input.media_db),
self.import_progress_fn(),
&self.log,
)
.map(Into::into)
}

View file

@ -21,14 +21,17 @@ use crate::{
ImportProgress, IncrementalProgress,
},
io::{atomic_rename, tempfile_in_parent_of},
media::MediaManager,
prelude::*,
};
pub fn import_colpkg(
colpkg_path: &str,
target_col_path: &str,
target_media_folder: &str,
target_media_folder: &Path,
media_db: &Path,
mut progress_fn: impl FnMut(ImportProgress) -> Result<()>,
log: &Logger,
) -> Result<()> {
progress_fn(ImportProgress::Collection)?;
let col_path = PathBuf::from(target_col_path);
@ -43,8 +46,14 @@ pub fn import_colpkg(
check_collection_and_mod_schema(tempfile.path())?;
progress_fn(ImportProgress::Collection)?;
let media_folder = Path::new(target_media_folder);
restore_media(&meta, progress_fn, &mut archive, media_folder)?;
restore_media(
&meta,
progress_fn,
&mut archive,
target_media_folder,
media_db,
log,
)?;
atomic_rename(tempfile, &col_path, true)
}
@ -70,14 +79,26 @@ fn restore_media(
mut progress_fn: impl FnMut(ImportProgress) -> Result<()>,
archive: &mut ZipArchive<File>,
media_folder: &Path,
media_db: &Path,
log: &Logger,
) -> Result<()> {
let media_entries = extract_media_entries(meta, archive)?;
if media_entries.is_empty() {
return Ok(());
}
std::fs::create_dir_all(media_folder)?;
let media_manager = MediaManager::new(media_folder, media_db)?;
let mut db_progress_fn = |u| progress_fn(ImportProgress::MediaCheck(u)).is_ok();
media_manager.register_changes(&mut db_progress_fn, log)?;
let mut get_checksum = media_manager.checksum_getter();
let mut progress = IncrementalProgress::new(|u| progress_fn(ImportProgress::Media(u)));
for entry in &media_entries {
progress.increment()?;
maybe_restore_media_file(meta, media_folder, archive, entry)?;
maybe_restore_media_file(meta, media_folder, archive, entry, &mut get_checksum)?;
}
Ok(())
@ -88,10 +109,11 @@ fn maybe_restore_media_file(
media_folder: &Path,
archive: &mut ZipArchive<File>,
entry: &SafeMediaEntry,
get_checksum: &mut impl FnMut(&str) -> Result<Option<Sha1Hash>>,
) -> Result<()> {
let file_path = entry.file_path(media_folder);
let mut zip_file = entry.fetch_file(archive)?;
let already_exists = entry.is_equal_to(meta, &zip_file, &file_path);
let already_exists = entry.is_equal_to(meta, &zip_file, &file_path, get_checksum)?;
if !already_exists {
restore_media_file(meta, &mut zip_file, &file_path)?;
};

View file

@ -8,8 +8,8 @@ use std::path::Path;
use tempfile::tempdir;
use crate::{
collection::CollectionBuilder, import_export::package::import_colpkg, media::MediaManager,
prelude::*,
collection::CollectionBuilder, import_export::package::import_colpkg, log::terminal,
media::MediaManager, prelude::*,
};
fn collection_with_media(dir: &Path, name: &str) -> Result<Collection> {
@ -42,18 +42,25 @@ fn roundtrip() -> Result<()> {
let col = collection_with_media(dir, name)?;
let colpkg_name = dir.join(format!("{name}.colpkg"));
col.export_colpkg(&colpkg_name, true, legacy, |_| Ok(()))?;
// import into a new collection
let anki2_name = dir
.join(format!("{name}.anki2"))
.to_string_lossy()
.into_owned();
let import_media_dir = dir.join(format!("{name}.media"));
std::fs::create_dir_all(&import_media_dir)?;
let import_media_db = dir.join(format!("{name}.mdb"));
MediaManager::new(&import_media_dir, &import_media_db)?;
import_colpkg(
&colpkg_name.to_string_lossy(),
&anki2_name,
import_media_dir.to_str().unwrap(),
&import_media_dir,
&import_media_db,
|_| Ok(()),
&terminal(),
)?;
// confirm collection imported
let col = CollectionBuilder::new(&anki2_name).build()?;
assert_eq!(

View file

@ -25,6 +25,7 @@ use crate::{
/// Like [MediaEntry], but with a safe filename and set zip filename.
pub(super) struct SafeMediaEntry {
pub(super) name: String,
#[allow(dead_code)]
pub(super) size: u32,
pub(super) sha1: Sha1Hash,
pub(super) index: usize,
@ -90,16 +91,14 @@ impl SafeMediaEntry {
meta: &Meta,
self_zipped: &ZipFile,
other_path: &Path,
) -> bool {
// TODO: check hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147)
let self_size = if meta.media_list_is_hashmap() {
self_zipped.size()
get_checksum: &mut impl FnMut(&str) -> Result<Option<Sha1Hash>>,
) -> Result<bool> {
if meta.media_list_is_hashmap() {
Ok(fs::metadata(other_path)
.map_or(false, |metadata| metadata.len() == self_zipped.size()))
} else {
self.size as u64
};
fs::metadata(other_path)
.map(|metadata| metadata.len() as u64 == self_size)
.unwrap_or_default()
get_checksum(&self.name).map(|opt| opt.map_or(false, |sha1| sha1 == self.sha1))
}
}
pub(super) fn copy_from_archive(

View file

@ -167,4 +167,21 @@ impl MediaManager {
ChangeTracker::new(&self.media_folder, progress, log).register_changes(&mut dbctx)?;
dbctx.all_checksums()
}
pub fn checksum_getter(&self) -> impl FnMut(&str) -> Result<Option<Sha1Hash>> + '_ {
let mut dbctx = self.dbctx();
move |fname: &str| {
dbctx
.get_entry(fname)
.map(|opt| opt.and_then(|entry| entry.sha1))
}
}
pub fn register_changes(
&self,
progress: &mut impl FnMut(usize) -> bool,
log: &Logger,
) -> Result<()> {
ChangeTracker::new(&self.media_folder, progress, log).register_changes(&mut self.dbctx())
}
}