From c2e8d89fc6c6b5f3eaedb0fd0605e3d1adea3a55 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Thu, 17 Mar 2022 15:11:23 +1000 Subject: [PATCH] Colpkg fixes (#1722) * Fix legacy colpkg import; disable v3 import/export; add roundtrip test The test has revealed we weren't decompressing the media files on v3 import. That's easy to fix, but means all files need decompressing even when they already exist, which is not ideal - it would be better to store size/checksum in the metadata instead. * Switch media and meta to protobuf; re-enable v3 import/export - Fixed media not being decompressed on import - The uncompressed size and checksum is now included for each media entry, so that we can quickly check if a given file needs to be extracted. We're still just doing a naive size comparison on colpkg import at the moment, but we may want to use a checksum in the future, and will need a checksum for apkg imports. - Checksums can't be efficiently encoded in JSON, so the media list has been switched to protobuf to reduce the the space requirements. - The meta file has been switched to protobuf as well, for consistency. This will mean any colpkg files exported with beta7 will be unreadable. * Avoid integer version comparisons * Re-enable v3 test * Apply suggestions from code review Co-authored-by: RumovZ * Add export_colpkg() method to Collection More discoverable, and easier to call from unit tests * Split import/export code out into separate folders Currently colpkg/*.rs contain some routines that will be useful for apkg import/export as well; in the future we can refactor them into a separate file in the parent module. * Return a proper error when media import fails This tripped me up when writing the earlier unit test - I had called the equivalent of import_colpkg()?, and it was returning a string error that I didn't notice. In practice this should result in the same text being shown in the UI, but just skips the tooltip. * Automatically create media folder on import * Move roundtrip test into separate file; check collection too * Remove zstd version suffix Prevents a warning shown each time Rust Analyzer is used to check the code. Co-authored-by: RumovZ --- proto/anki/backend.proto | 1 + proto/anki/collection.proto | 14 -- proto/anki/import_export.proto | 52 +++++ pylib/anki/_backend/genbackend.py | 3 + pylib/anki/collection.py | 2 +- qt/aqt/importing.py | 8 +- rslib/Cargo.toml | 2 +- rslib/src/backend/collection.rs | 65 +----- rslib/src/backend/import_export.rs | 67 ++++++ rslib/src/backend/mod.rs | 5 + rslib/src/backend/progress.rs | 2 +- rslib/src/backend_proto.rs | 1 + rslib/src/collection/backup.rs | 170 +-------------- rslib/src/collection/mod.rs | 1 - rslib/src/error/mod.rs | 10 +- rslib/src/import_export/mod.rs | 10 + .../package/colpkg/export.rs} | 158 ++++++++------ .../import_export/package/colpkg/import.rs | 203 ++++++++++++++++++ rslib/src/import_export/package/colpkg/mod.rs | 6 + .../src/import_export/package/colpkg/tests.rs | 70 ++++++ rslib/src/import_export/package/meta.rs | 45 ++++ rslib/src/import_export/package/mod.rs | 11 + rslib/src/lib.rs | 1 + rslib/src/scheduler/answering/mod.rs | 1 + 24 files changed, 587 insertions(+), 321 deletions(-) create mode 100644 proto/anki/import_export.proto create mode 100644 rslib/src/backend/import_export.rs create mode 100644 rslib/src/import_export/mod.rs rename rslib/src/{collection/exporting.rs => import_export/package/colpkg/export.rs} (64%) create mode 100644 rslib/src/import_export/package/colpkg/import.rs create mode 100644 rslib/src/import_export/package/colpkg/mod.rs create mode 100644 rslib/src/import_export/package/colpkg/tests.rs create mode 100644 rslib/src/import_export/package/meta.rs create mode 100644 rslib/src/import_export/package/mod.rs diff --git a/proto/anki/backend.proto b/proto/anki/backend.proto index f2deb7d71..74675f4bd 100644 --- a/proto/anki/backend.proto +++ b/proto/anki/backend.proto @@ -25,6 +25,7 @@ enum ServiceIndex { SERVICE_INDEX_COLLECTION = 13; SERVICE_INDEX_CARDS = 14; SERVICE_INDEX_LINKS = 15; + SERVICE_INDEX_IMPORT_EXPORT = 16; } message BackendInit { diff --git a/proto/anki/collection.proto b/proto/anki/collection.proto index 7d55b9b60..f1552ccbd 100644 --- a/proto/anki/collection.proto +++ b/proto/anki/collection.proto @@ -10,7 +10,6 @@ import "anki/generic.proto"; service CollectionService { rpc OpenCollection(OpenCollectionRequest) returns (generic.Empty); rpc CloseCollection(CloseCollectionRequest) returns (generic.Empty); - rpc RestoreBackup(RestoreBackupRequest) returns (generic.String); rpc CheckDatabase(generic.Empty) returns (CheckDatabaseResponse); rpc GetUndoStatus(generic.Empty) returns (UndoStatus); rpc Undo(generic.Empty) returns (OpChangesAfterUndo); @@ -20,7 +19,6 @@ service CollectionService { rpc LatestProgress(generic.Empty) returns (Progress); rpc SetWantsAbort(generic.Empty) returns (generic.Empty); rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty); - rpc ExportCollection(ExportCollectionRequest) returns (generic.Empty); } message OpenCollectionRequest { @@ -39,12 +37,6 @@ message CloseCollectionRequest { optional uint64 minimum_backup_interval = 3; } -message RestoreBackupRequest { - string col_path = 1; - string backup_path = 2; - string media_folder = 3; -} - message CheckDatabaseResponse { repeated string problems = 1; } @@ -125,9 +117,3 @@ message Progress { uint32 exporting = 8; } } - -message ExportCollectionRequest { - string out_path = 1; - bool include_media = 2; - bool legacy = 3; -} diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto new file mode 100644 index 000000000..e21c7ffb4 --- /dev/null +++ b/proto/anki/import_export.proto @@ -0,0 +1,52 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +syntax = "proto3"; + +package anki.import_export; + +import "anki/generic.proto"; + +service ImportExportService { + rpc ImportCollectionPackage(ImportCollectionPackageRequest) + returns (generic.Empty); + rpc ExportCollectionPackage(ExportCollectionPackageRequest) + returns (generic.Empty); +} + +message ImportCollectionPackageRequest { + string col_path = 1; + string backup_path = 2; + string media_folder = 3; +} + +message ExportCollectionPackageRequest { + string out_path = 1; + bool include_media = 2; + bool legacy = 3; +} + +message PackageMetadata { + enum Version { + VERSION_UNKNOWN = 0; + // When `meta` missing, and collection.anki2 file present. + VERSION_LEGACY_1 = 1; + // When `meta` missing, and collection.anki21 file present. + VERSION_LEGACY_2 = 2; + /// Implies MediaEntry media map, and zstd compression. + /// collection.21b file + VERSION_LATEST = 3; + } + + Version version = 1; +} + +message MediaEntries { + message MediaEntry { + string name = 1; + uint32 size = 2; + bytes sha1 = 3; + } + + repeated MediaEntry entries = 1; +} diff --git a/pylib/anki/_backend/genbackend.py b/pylib/anki/_backend/genbackend.py index 60b84558a..8f8f82eb7 100755 --- a/pylib/anki/_backend/genbackend.py +++ b/pylib/anki/_backend/genbackend.py @@ -25,6 +25,7 @@ import anki.stats_pb2 import anki.card_rendering_pb2 import anki.tags_pb2 import anki.media_pb2 +import anki.import_export_pb2 import stringcase @@ -184,6 +185,7 @@ service_modules = dict( TAGS=anki.tags_pb2, MEDIA=anki.media_pb2, LINKS=anki.links_pb2, + IMPORT_EXPORT=anki.import_export_pb2, ) for service in anki.backend_pb2.ServiceIndex.DESCRIPTOR.values: @@ -236,6 +238,7 @@ import anki.stats_pb2 import anki.card_rendering_pb2 import anki.tags_pb2 import anki.media_pb2 +import anki.import_export_pb2 class RustBackendGenerated: def _run_command(self, service: int, method: int, input: Any) -> bytes: diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index 2e7468129..65ed984b3 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -268,7 +268,7 @@ class Collection(DeprecatedNamesMixin): self, out_path: str, include_media: bool, legacy: bool ) -> None: self.close_for_full_sync() - self._backend.export_collection( + self._backend.export_collection_package( out_path=out_path, include_media=include_media, legacy=legacy ) diff --git a/qt/aqt/importing.py b/qt/aqt/importing.py index 7ed676f6e..7e5f65c64 100644 --- a/qt/aqt/importing.py +++ b/qt/aqt/importing.py @@ -479,10 +479,10 @@ def replace_with_apkg( mw.taskman.run_on_main(lambda: mw.progress.update(label=label)) - def do_import() -> str: + def do_import() -> None: col_path = mw.pm.collectionPath() media_folder = os.path.join(mw.pm.profileFolder(), "collection.media") - return mw.backend.restore_backup( + mw.backend.import_collection_package( col_path=col_path, backup_path=filename, media_folder=media_folder ) @@ -491,14 +491,12 @@ def replace_with_apkg( timer.deleteLater() try: - soft_error = future.result() + future.result() except Exception as error: if not isinstance(error, Interrupted): showWarning(str(error)) callback(False) else: - if soft_error: - showWarning(soft_error) callback(True) qconnect(timer.timeout, on_progress) diff --git a/rslib/Cargo.toml b/rslib/Cargo.toml index 2dfd7fe5f..afd0198c7 100644 --- a/rslib/Cargo.toml +++ b/rslib/Cargo.toml @@ -98,5 +98,5 @@ tokio-util = { version = "0.6.8", features = ["io"] } pct-str = { git="https://github.com/timothee-haudebourg/pct-str.git", rev="4adccd8d4a222ab2672350a102f06ae832a0572d" } unic-ucd-category = "0.9.0" id_tree = "1.8.0" -zstd = { version="0.10.0+zstd.1.5.2", features=["zstdmt"] } +zstd = { version="0.10.0", features=["zstdmt"] } num_cpus = "1.13.1" diff --git a/rslib/src/backend/collection.rs b/rslib/src/backend/collection.rs index 80247e704..d354beac7 100644 --- a/rslib/src/backend/collection.rs +++ b/rslib/src/backend/collection.rs @@ -10,11 +10,7 @@ pub(super) use crate::backend_proto::collection_service::Service as CollectionSe use crate::{ backend::progress::progress_to_proto, backend_proto::{self as pb, preferences::Backups}, - collection::{ - backup::{self, ImportProgress}, - exporting::export_collection_file, - CollectionBuilder, - }, + collection::{backup, CollectionBuilder}, log::{self}, prelude::*, }; @@ -76,42 +72,6 @@ impl CollectionService for Backend { Ok(().into()) } - - fn export_collection(&self, input: pb::ExportCollectionRequest) -> Result { - self.abort_media_sync_and_wait(); - - let mut guard = self.lock_open_collection()?; - - let col_inner = guard.take().unwrap(); - let col_path = col_inner.col_path.clone(); - let media_dir = input.include_media.then(|| col_inner.media_folder.clone()); - - col_inner.close(true)?; - - export_collection_file( - input.out_path, - col_path, - media_dir, - input.legacy, - &self.tr, - self.export_progress_fn(), - ) - .map(Into::into) - } - - fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result { - let _guard = self.lock_closed_collection()?; - - backup::restore_backup( - self.import_progress_fn(), - &input.col_path, - &input.backup_path, - &input.media_folder, - &self.tr, - ) - .map(Into::into) - } - fn check_database(&self, _input: pb::Empty) -> Result { let mut handler = self.new_progress_handler(); let progress_fn = move |progress, throttle| { @@ -154,7 +114,7 @@ impl CollectionService for Backend { } impl Backend { - fn lock_open_collection(&self) -> Result>> { + pub(super) fn lock_open_collection(&self) -> Result>> { let guard = self.col.lock().unwrap(); guard .is_some() @@ -162,7 +122,7 @@ impl Backend { .ok_or(AnkiError::CollectionNotOpen) } - fn lock_closed_collection(&self) -> Result>> { + pub(super) fn lock_closed_collection(&self) -> Result>> { let guard = self.col.lock().unwrap(); guard .is_none() @@ -195,23 +155,4 @@ impl Backend { Ok(()) } - - fn import_progress_fn(&self) -> impl FnMut(ImportProgress) -> Result<()> { - let mut handler = self.new_progress_handler(); - move |progress| { - let throttle = matches!(progress, ImportProgress::Media(_)); - if handler.update(Progress::Import(progress), throttle) { - Ok(()) - } else { - Err(AnkiError::Interrupted) - } - } - } - - fn export_progress_fn(&self) -> impl FnMut(usize) { - let mut handler = self.new_progress_handler(); - move |media_files| { - handler.update(Progress::Export(media_files), true); - } - } } diff --git a/rslib/src/backend/import_export.rs b/rslib/src/backend/import_export.rs new file mode 100644 index 000000000..47d99a456 --- /dev/null +++ b/rslib/src/backend/import_export.rs @@ -0,0 +1,67 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use super::{progress::Progress, Backend}; +pub(super) use crate::backend_proto::importexport_service::Service as ImportExportService; +use crate::{ + backend_proto::{self as pb}, + import_export::{package::import_colpkg, ImportProgress}, + prelude::*, +}; + +impl ImportExportService for Backend { + fn export_collection_package( + &self, + input: pb::ExportCollectionPackageRequest, + ) -> Result { + self.abort_media_sync_and_wait(); + + let mut guard = self.lock_open_collection()?; + + let col_inner = guard.take().unwrap(); + col_inner + .export_colpkg( + input.out_path, + input.include_media, + input.legacy, + self.export_progress_fn(), + ) + .map(Into::into) + } + + fn import_collection_package( + &self, + input: pb::ImportCollectionPackageRequest, + ) -> Result { + let _guard = self.lock_closed_collection()?; + + import_colpkg( + &input.backup_path, + &input.col_path, + &input.media_folder, + self.import_progress_fn(), + ) + .map(Into::into) + } +} + +impl Backend { + fn import_progress_fn(&self) -> impl FnMut(ImportProgress) -> Result<()> { + let mut handler = self.new_progress_handler(); + move |progress| { + let throttle = matches!(progress, ImportProgress::Media(_)); + if handler.update(Progress::Import(progress), throttle) { + Ok(()) + } else { + Err(AnkiError::Interrupted) + } + } + } + + fn export_progress_fn(&self) -> impl FnMut(usize) { + let mut handler = self.new_progress_handler(); + move |media_files| { + handler.update(Progress::Export(media_files), true); + } + } +} diff --git a/rslib/src/backend/mod.rs b/rslib/src/backend/mod.rs index 8cfa8dc69..ca1805ec8 100644 --- a/rslib/src/backend/mod.rs +++ b/rslib/src/backend/mod.rs @@ -15,6 +15,7 @@ mod decks; mod error; mod generic; mod i18n; +mod import_export; mod links; mod media; mod notes; @@ -47,6 +48,7 @@ use self::{ deckconfig::DeckConfigService, decks::DecksService, i18n::I18nService, + import_export::ImportExportService, links::LinksService, media::MediaService, notes::NotesService, @@ -145,6 +147,9 @@ impl Backend { pb::ServiceIndex::Links => LinksService::run_method(self, method, input), pb::ServiceIndex::Collection => CollectionService::run_method(self, method, input), pb::ServiceIndex::Cards => CardsService::run_method(self, method, input), + pb::ServiceIndex::ImportExport => { + ImportExportService::run_method(self, method, input) + } }) .map_err(|err| { let backend_err = err.into_protobuf(&self.tr); diff --git a/rslib/src/backend/progress.rs b/rslib/src/backend/progress.rs index ea88c1c29..a21bede23 100644 --- a/rslib/src/backend/progress.rs +++ b/rslib/src/backend/progress.rs @@ -8,9 +8,9 @@ use futures::future::AbortHandle; use super::Backend; use crate::{ backend_proto as pb, - collection::backup::ImportProgress, dbcheck::DatabaseCheckProgress, i18n::I18n, + import_export::ImportProgress, media::sync::MediaSyncProgress, sync::{FullSyncProgress, NormalSyncProgress, SyncStage}, }; diff --git a/rslib/src/backend_proto.rs b/rslib/src/backend_proto.rs index ec9f2da43..f3b6eff71 100644 --- a/rslib/src/backend_proto.rs +++ b/rslib/src/backend_proto.rs @@ -22,6 +22,7 @@ protobuf!(deckconfig); protobuf!(decks); protobuf!(generic); protobuf!(i18n); +protobuf!(import_export); protobuf!(links); protobuf!(media); protobuf!(notes); diff --git a/rslib/src/collection/backup.rs b/rslib/src/collection/backup.rs index fb46f7701..40126ab9c 100644 --- a/rslib/src/collection/backup.rs +++ b/rslib/src/collection/backup.rs @@ -2,10 +2,8 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use std::{ - collections::HashMap, ffi::OsStr, - fs::{self, read_dir, remove_file, DirEntry, File}, - io::{self, Write}, + fs::{read_dir, remove_file, DirEntry}, path::{Path, PathBuf}, thread::{self, JoinHandle}, time::SystemTime, @@ -14,32 +12,16 @@ use std::{ use chrono::prelude::*; use itertools::Itertools; use log::error; -use tempfile::NamedTempFile; -use zip::ZipArchive; -use zstd::{self, stream::copy_decode}; use crate::{ - backend_proto::preferences::Backups, - collection::{ - exporting::{export_collection_data, Meta, PACKAGE_VERSION}, - CollectionBuilder, - }, - error::ImportError, - log, + backend_proto::preferences::Backups, import_export::package::export_colpkg_from_data, log, prelude::*, - text::normalize_to_nfc, }; const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg"; /// Default seconds after a backup, in which further backups will be skipped. const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60; -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum ImportProgress { - Collection, - Media(usize), -} - pub fn backup( col_path: impl AsRef, backup_folder: impl AsRef + Send + 'static, @@ -69,45 +51,6 @@ fn has_recent_backup(backup_folder: &Path, recent_secs: u64) -> Result { .any(|duration| duration.as_secs() < recent_secs)) } -pub fn restore_backup( - mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, - col_path: &str, - backup_path: &str, - media_folder: &str, - tr: &I18n, -) -> Result { - progress_fn(ImportProgress::Collection)?; - let col_path = PathBuf::from(col_path); - let col_dir = col_path - .parent() - .ok_or_else(|| AnkiError::invalid_input("bad collection path"))?; - let mut tempfile = NamedTempFile::new_in(col_dir)?; - - let backup_file = File::open(backup_path)?; - let mut archive = ZipArchive::new(backup_file)?; - let meta = Meta::from_archive(&mut archive)?; - - copy_collection(&mut archive, &mut tempfile, meta)?; - progress_fn(ImportProgress::Collection)?; - check_collection(tempfile.path())?; - progress_fn(ImportProgress::Collection)?; - - let mut result = String::new(); - if let Err(e) = restore_media(meta, progress_fn, &mut archive, media_folder) { - result = tr - .importing_failed_to_import_media_file(e.localized_description(tr)) - .into_owned() - }; - - tempfile.as_file().sync_all()?; - tempfile.persist(&col_path).map_err(|err| err.error)?; - if !cfg!(windows) { - File::open(col_dir)?.sync_all()?; - } - - Ok(result) -} - fn backup_inner>( col_data: &[u8], backup_folder: P, @@ -126,7 +69,7 @@ fn backup_inner>( fn write_backup>(col_data: &[u8], backup_folder: S, tr: &I18n) -> Result<()> { let out_path = Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING))); - export_collection_data(&out_path, col_data, tr) + export_colpkg_from_data(&out_path, col_data, tr) } fn thin_backups>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> { @@ -281,113 +224,6 @@ impl BackupFilter { } } -impl Meta { - /// Extracts meta data from an archive and checks if its version is supported. - fn from_archive(archive: &mut ZipArchive) -> Result { - let mut meta: Self = archive - .by_name("meta") - .ok() - .and_then(|file| serde_json::from_reader(file).ok()) - .unwrap_or_default(); - if meta.version > PACKAGE_VERSION { - return Err(AnkiError::ImportError(ImportError::TooNew)); - } else if meta.version == 0 { - meta.version = if archive.by_name("collection.anki21").is_ok() { - 2 - } else { - 1 - }; - } - - Ok(meta) - } -} - -fn check_collection(col_path: &Path) -> Result<()> { - CollectionBuilder::new(col_path) - .build() - .ok() - .and_then(|col| { - col.storage - .db - .pragma_query_value(None, "integrity_check", |row| row.get::<_, String>(0)) - .ok() - }) - .and_then(|s| (s == "ok").then(|| ())) - .ok_or(AnkiError::ImportError(ImportError::Corrupt)) -} - -fn restore_media( - meta: Meta, - mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, - archive: &mut ZipArchive, - media_folder: &str, -) -> Result<()> { - let media_file_names = extract_media_file_names(meta, archive)?; - let mut count = 0; - - for (archive_file_name, file_name) in media_file_names.iter().enumerate() { - count += 1; - if count % 10 == 0 { - progress_fn(ImportProgress::Media(count))?; - } - - if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) { - let file_path = Path::new(&media_folder).join(normalize_to_nfc(file_name).as_ref()); - let files_are_equal = fs::metadata(&file_path) - .map(|metadata| metadata.len() == zip_file.size()) - .unwrap_or_default(); - if !files_are_equal { - let mut file = match File::create(&file_path) { - Ok(file) => file, - Err(err) => return Err(AnkiError::file_io_error(err, &file_path)), - }; - if let Err(err) = io::copy(&mut zip_file, &mut file) { - return Err(AnkiError::file_io_error(err, &file_path)); - } - } - } else { - return Err(AnkiError::invalid_input(&format!( - "{archive_file_name} missing from archive" - ))); - } - } - Ok(()) -} - -fn extract_media_file_names(meta: Meta, archive: &mut ZipArchive) -> Result> { - let mut file = archive.by_name("media")?; - let mut buf = Vec::new(); - if meta.zstd_compressed() { - copy_decode(file, &mut buf)?; - } else { - io::copy(&mut file, &mut buf)?; - } - if meta.media_list_is_hashmap() { - let map: HashMap<&str, String> = serde_json::from_slice(&buf)?; - Ok(map.into_iter().map(|(_k, v)| v).collect()) - } else { - serde_json::from_slice(&buf).map_err(Into::into) - } -} - -fn copy_collection( - archive: &mut ZipArchive, - writer: &mut impl Write, - meta: Meta, -) -> Result<()> { - let mut file = archive - .by_name(meta.collection_name()) - .map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?; - if !meta.zstd_compressed() { - io::copy(&mut file, writer)?; - } else { - copy_decode(file, writer)?; - } - - Ok(()) -} - #[cfg(test)] mod test { use super::*; diff --git a/rslib/src/collection/mod.rs b/rslib/src/collection/mod.rs index 27e3aabb5..b5d8fa4f2 100644 --- a/rslib/src/collection/mod.rs +++ b/rslib/src/collection/mod.rs @@ -2,7 +2,6 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html pub mod backup; -pub mod exporting; pub(crate) mod timestamps; mod transact; pub(crate) mod undo; diff --git a/rslib/src/error/mod.rs b/rslib/src/error/mod.rs index ae20ad9f8..27a17cdb0 100644 --- a/rslib/src/error/mod.rs +++ b/rslib/src/error/mod.rs @@ -183,17 +183,19 @@ pub enum TemplateSaveErrorDetails { ExtraneousCloze, } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone)] pub enum ImportError { Corrupt, TooNew, + MediaImportFailed(String), } impl ImportError { - fn localized_description(self, tr: &I18n) -> String { + fn localized_description(&self, tr: &I18n) -> String { match self { - Self::Corrupt => tr.importing_the_provided_file_is_not_a(), - Self::TooNew => tr.errors_collection_too_new(), + ImportError::Corrupt => tr.importing_the_provided_file_is_not_a(), + ImportError::TooNew => tr.errors_collection_too_new(), + ImportError::MediaImportFailed(err) => tr.importing_failed_to_import_media_file(err), } .into() } diff --git a/rslib/src/import_export/mod.rs b/rslib/src/import_export/mod.rs new file mode 100644 index 000000000..994d93101 --- /dev/null +++ b/rslib/src/import_export/mod.rs @@ -0,0 +1,10 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +pub mod package; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ImportProgress { + Collection, + Media(usize), +} diff --git a/rslib/src/collection/exporting.rs b/rslib/src/import_export/package/colpkg/export.rs similarity index 64% rename from rslib/src/collection/exporting.rs rename to rslib/src/import_export/package/colpkg/export.rs index 9b1a6a133..491c63df8 100644 --- a/rslib/src/collection/exporting.rs +++ b/rslib/src/import_export/package/colpkg/export.rs @@ -8,7 +8,7 @@ use std::{ path::{Path, PathBuf}, }; -use serde_derive::{Deserialize, Serialize}; +use prost::Message; use tempfile::NamedTempFile; use zip::{write::FileOptions, CompressionMethod, ZipWriter}; use zstd::{ @@ -16,54 +16,49 @@ use zstd::{ Encoder, }; -use crate::{collection::CollectionBuilder, prelude::*, text::normalize_to_nfc}; +use super::super::{MediaEntries, MediaEntry, Meta, Version}; +use crate::{ + collection::CollectionBuilder, media::files::sha1_of_data, prelude::*, text::normalize_to_nfc, +}; -/// Bump if making changes that break restoring on older releases. -pub const PACKAGE_VERSION: u8 = 3; -const COLLECTION_NAME: &str = "collection.anki21b"; -const COLLECTION_NAME_V1: &str = "collection.anki2"; -const COLLECTION_NAME_V2: &str = "collection.anki21"; /// Enable multithreaded compression if over this size. For smaller files, /// multithreading makes things slower, and in initial tests, the crossover /// point was somewhere between 1MB and 10MB on a many-core system. const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024; -#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)] -#[serde(default)] -pub(super) struct Meta { - #[serde(rename = "ver")] - pub(super) version: u8, -} - -impl Meta { - pub(super) fn new() -> Self { - Self { - version: PACKAGE_VERSION, - } - } - - pub(super) fn new_v2() -> Self { - Self { version: 2 } - } - - pub(super) fn collection_name(&self) -> &'static str { - match self.version { - 1 => COLLECTION_NAME_V1, - 2 => COLLECTION_NAME_V2, - _ => COLLECTION_NAME, - } - } - - pub(super) fn zstd_compressed(&self) -> bool { - self.version >= 3 - } - - pub(super) fn media_list_is_hashmap(&self) -> bool { - self.version < 3 +impl Collection { + pub fn export_colpkg( + self, + out_path: impl AsRef, + include_media: bool, + legacy: bool, + progress_fn: impl FnMut(usize), + ) -> Result<()> { + let colpkg_name = out_path.as_ref(); + let src_path = self.col_path.clone(); + let src_media_folder = if include_media { + Some(self.media_folder.clone()) + } else { + None + }; + let tr = self.tr.clone(); + // FIXME: downgrade on v3 export is superfluous at current schema version. We don't + // want things to break when the schema is bumped in the future, so perhaps the + // exporting code should be downgrading to 18 instead of 11 (which will probably require + // changing the boolean to an enum). + self.close(true)?; + export_collection_file( + &colpkg_name, + &src_path, + src_media_folder, + legacy, + &tr, + progress_fn, + ) } } -pub fn export_collection_file( +fn export_collection_file( out_path: impl AsRef, col_path: impl AsRef, media_dir: Option, @@ -71,7 +66,11 @@ pub fn export_collection_file( tr: &I18n, progress_fn: impl FnMut(usize), ) -> Result<()> { - let meta = if legacy { Meta::new_v2() } else { Meta::new() }; + let meta = if legacy { + Meta::new_legacy() + } else { + Meta::new() + }; let mut col_file = File::open(col_path)?; let col_size = col_file.metadata()?.len() as usize; export_collection( @@ -85,7 +84,8 @@ pub fn export_collection_file( ) } -pub(crate) fn export_collection_data( +/// Write copied collection data without any media. +pub(crate) fn export_colpkg_from_data( out_path: impl AsRef, mut col_data: &[u8], tr: &I18n, @@ -115,10 +115,12 @@ fn export_collection( let mut zip = ZipWriter::new(out_file); zip.start_file("meta", file_options_stored())?; - zip.write_all(serde_json::to_string(&meta).unwrap().as_bytes())?; - write_collection(meta, &mut zip, col, col_size)?; + let mut meta_bytes = vec![]; + meta.encode(&mut meta_bytes)?; + zip.write_all(&meta_bytes)?; + write_collection(&meta, &mut zip, col, col_size)?; write_dummy_collection(&mut zip, tr)?; - write_media(meta, &mut zip, media_dir, progress_fn)?; + write_media(&meta, &mut zip, media_dir, progress_fn)?; zip.finish()?; Ok(()) @@ -129,16 +131,16 @@ fn file_options_stored() -> FileOptions { } fn write_collection( - meta: Meta, + meta: &Meta, zip: &mut ZipWriter, col: &mut impl Read, size: usize, ) -> Result<()> { if meta.zstd_compressed() { - zip.start_file(meta.collection_name(), file_options_stored())?; + zip.start_file(meta.collection_filename(), file_options_stored())?; zstd_copy(col, zip, size)?; } else { - zip.start_file(meta.collection_name(), FileOptions::default())?; + zip.start_file(meta.collection_filename(), FileOptions::default())?; io::copy(col, zip)?; } Ok(()) @@ -146,7 +148,10 @@ fn write_collection( fn write_dummy_collection(zip: &mut ZipWriter, tr: &I18n) -> Result<()> { let mut tempfile = create_dummy_collection_file(tr)?; - zip.start_file(COLLECTION_NAME_V1, file_options_stored())?; + zip.start_file( + Version::Legacy1.collection_filename(), + file_options_stored(), + )?; io::copy(&mut tempfile, zip)?; Ok(()) @@ -187,36 +192,45 @@ fn zstd_copy(reader: &mut impl Read, writer: &mut impl Write, size: usize) -> Re } fn write_media( - meta: Meta, + meta: &Meta, zip: &mut ZipWriter, media_dir: Option, progress_fn: impl FnMut(usize), ) -> Result<()> { - let mut media_names = vec![]; + let mut media_entries = vec![]; if let Some(media_dir) = media_dir { - write_media_files(meta, zip, &media_dir, &mut media_names, progress_fn)?; + write_media_files(meta, zip, &media_dir, &mut media_entries, progress_fn)?; } - write_media_map(meta, &media_names, zip)?; + write_media_map(meta, media_entries, zip)?; Ok(()) } -fn write_media_map(meta: Meta, media_names: &[String], zip: &mut ZipWriter) -> Result<()> { +fn write_media_map( + meta: &Meta, + media_entries: Vec, + zip: &mut ZipWriter, +) -> Result<()> { zip.start_file("media", file_options_stored())?; - let json_bytes = if meta.media_list_is_hashmap() { - let map: HashMap = media_names + let encoded_bytes = if meta.media_list_is_hashmap() { + let map: HashMap = media_entries .iter() .enumerate() - .map(|(k, v)| (k.to_string(), v.as_str())) + .map(|(k, entry)| (k.to_string(), entry.name.as_str())) .collect(); serde_json::to_vec(&map)? } else { - serde_json::to_vec(media_names)? + let mut buf = vec![]; + MediaEntries { + entries: media_entries, + } + .encode(&mut buf)?; + buf }; - let size = json_bytes.len(); - let mut cursor = std::io::Cursor::new(json_bytes); + let size = encoded_bytes.len(); + let mut cursor = std::io::Cursor::new(encoded_bytes); if meta.zstd_compressed() { zstd_copy(&mut cursor, zip, size)?; } else { @@ -226,10 +240,10 @@ fn write_media_map(meta: Meta, media_names: &[String], zip: &mut ZipWriter } fn write_media_files( - meta: Meta, + meta: &Meta, zip: &mut ZipWriter, dir: &Path, - names: &mut Vec, + media_entries: &mut Vec, mut progress_fn: impl FnMut(usize), ) -> Result<()> { let mut writer = MediaFileWriter::new(meta); @@ -240,9 +254,15 @@ fn write_media_files( continue; } progress_fn(index); - names.push(normalized_unicode_file_name(&entry)?); + zip.start_file(index.to_string(), file_options_stored())?; - writer = writer.write(&mut File::open(entry.path())?, zip)?; + + let name = normalized_unicode_file_name(&entry)?; + // FIXME: we should chunk this + let data = std::fs::read(entry.path())?; + let media_entry = make_media_entry(&data, name); + writer = writer.write(&mut std::io::Cursor::new(data), zip)?; + media_entries.push(media_entry); // can't enumerate(), as we skip folders index += 1; } @@ -250,6 +270,14 @@ fn write_media_files( Ok(()) } +fn make_media_entry(data: &[u8], name: String) -> MediaEntry { + MediaEntry { + name, + size: data.len() as u32, + sha1: sha1_of_data(data).to_vec(), + } +} + fn normalized_unicode_file_name(entry: &DirEntry) -> Result { entry .file_name() @@ -268,7 +296,7 @@ fn normalized_unicode_file_name(entry: &DirEntry) -> Result { struct MediaFileWriter(Option>); impl MediaFileWriter { - fn new(meta: Meta) -> Self { + fn new(meta: &Meta) -> Self { Self( meta.zstd_compressed() .then(|| RawEncoder::with_dictionary(0, &[]).unwrap()), diff --git a/rslib/src/import_export/package/colpkg/import.rs b/rslib/src/import_export/package/colpkg/import.rs new file mode 100644 index 000000000..03229bca8 --- /dev/null +++ b/rslib/src/import_export/package/colpkg/import.rs @@ -0,0 +1,203 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::{ + collections::HashMap, + fs::{self, File}, + io::{self, Read, Write}, + path::{Path, PathBuf}, +}; + +use prost::Message; +use tempfile::NamedTempFile; +use zip::ZipArchive; +use zstd::{self, stream::copy_decode}; + +use super::super::Version; +use crate::{ + collection::CollectionBuilder, + error::ImportError, + import_export::{ + package::{MediaEntries, MediaEntry, Meta}, + ImportProgress, + }, + prelude::*, + text::normalize_to_nfc, +}; + +impl Meta { + /// Extracts meta data from an archive and checks if its version is supported. + pub(super) fn from_archive(archive: &mut ZipArchive) -> Result { + let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| { + let mut buf = vec![]; + meta_file.read_to_end(&mut buf).ok()?; + Some(buf) + }); + let meta = if let Some(bytes) = meta_bytes { + let meta: Meta = Message::decode(&*bytes)?; + if meta.version() == Version::Unknown { + return Err(AnkiError::ImportError(ImportError::TooNew)); + } + meta + } else { + Meta { + version: if archive.by_name("collection.anki21").is_ok() { + Version::Legacy2 + } else { + Version::Legacy1 + } as i32, + } + }; + Ok(meta) + } +} + +pub fn import_colpkg( + colpkg_path: &str, + target_col_path: &str, + target_media_folder: &str, + mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, +) -> Result<()> { + progress_fn(ImportProgress::Collection)?; + let col_path = PathBuf::from(target_col_path); + let col_dir = col_path + .parent() + .ok_or_else(|| AnkiError::invalid_input("bad collection path"))?; + let mut tempfile = NamedTempFile::new_in(col_dir)?; + + let backup_file = File::open(colpkg_path)?; + let mut archive = ZipArchive::new(backup_file)?; + let meta = Meta::from_archive(&mut archive)?; + + copy_collection(&mut archive, &mut tempfile, &meta)?; + progress_fn(ImportProgress::Collection)?; + check_collection(tempfile.path())?; + progress_fn(ImportProgress::Collection)?; + + let media_import_result = restore_media(&meta, progress_fn, &mut archive, target_media_folder); + + // Proceed with replacing collection, regardless of media import result + tempfile.as_file().sync_all()?; + tempfile.persist(&col_path).map_err(|err| err.error)?; + if !cfg!(windows) { + File::open(col_dir)?.sync_all()?; + } + + media_import_result +} + +fn check_collection(col_path: &Path) -> Result<()> { + CollectionBuilder::new(col_path) + .build() + .ok() + .and_then(|col| { + col.storage + .db + .pragma_query_value(None, "integrity_check", |row| row.get::<_, String>(0)) + .ok() + }) + .and_then(|s| (s == "ok").then(|| ())) + .ok_or(AnkiError::ImportError(ImportError::Corrupt)) +} + +fn restore_media( + meta: &Meta, + mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, + archive: &mut ZipArchive, + media_folder: &str, +) -> Result<()> { + let media_entries = extract_media_entries(meta, archive)?; + std::fs::create_dir_all(media_folder)?; + let mut count = 0; + + for (archive_file_name, entry) in media_entries.iter().enumerate() { + count += 1; + if count % 10 == 0 { + progress_fn(ImportProgress::Media(count))?; + } + + if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) { + let file_path = Path::new(&media_folder).join(normalize_to_nfc(&entry.name).as_ref()); + let size_in_colpkg = if meta.media_list_is_hashmap() { + zip_file.size() + } else { + entry.size as u64 + }; + let files_are_equal = fs::metadata(&file_path) + .map(|metadata| metadata.len() == size_in_colpkg) + .unwrap_or_default(); + if !files_are_equal { + // FIXME: write to temp file and atomic rename + let mut file = match File::create(&file_path) { + Ok(file) => file, + Err(err) => return Err(AnkiError::file_io_error(err, &file_path)), + }; + if meta.zstd_compressed() { + copy_decode(&mut zip_file, &mut file) + } else { + io::copy(&mut zip_file, &mut file).map(|_| ()) + } + .map_err(|err| AnkiError::file_io_error(err, &file_path))?; + } + } else { + return Err(AnkiError::invalid_input(&format!( + "{archive_file_name} missing from archive" + ))); + } + } + Ok(()) +} + +fn extract_media_entries(meta: &Meta, archive: &mut ZipArchive) -> Result> { + let mut file = archive.by_name("media")?; + let mut buf = Vec::new(); + if meta.zstd_compressed() { + copy_decode(file, &mut buf)?; + } else { + io::copy(&mut file, &mut buf)?; + } + if meta.media_list_is_hashmap() { + let map: HashMap<&str, String> = serde_json::from_slice(&buf)?; + let mut entries: Vec<(usize, String)> = map + .into_iter() + .map(|(k, v)| (k.parse().unwrap_or_default(), v)) + .collect(); + entries.sort_unstable(); + // any gaps in the file numbers would lead to media being imported under the wrong name + if entries + .iter() + .enumerate() + .any(|(idx1, (idx2, _))| idx1 != *idx2) + { + return Err(AnkiError::ImportError(ImportError::Corrupt)); + } + Ok(entries + .into_iter() + .map(|(_str_idx, name)| MediaEntry { + name, + size: 0, + sha1: vec![], + }) + .collect()) + } else { + let entries: MediaEntries = Message::decode(&*buf)?; + Ok(entries.entries) + } +} + +fn copy_collection( + archive: &mut ZipArchive, + writer: &mut impl Write, + meta: &Meta, +) -> Result<()> { + let mut file = archive + .by_name(meta.collection_filename()) + .map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?; + if !meta.zstd_compressed() { + io::copy(&mut file, writer)?; + } else { + copy_decode(file, writer)?; + } + + Ok(()) +} diff --git a/rslib/src/import_export/package/colpkg/mod.rs b/rslib/src/import_export/package/colpkg/mod.rs new file mode 100644 index 000000000..5416047de --- /dev/null +++ b/rslib/src/import_export/package/colpkg/mod.rs @@ -0,0 +1,6 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +pub(super) mod export; +pub(super) mod import; +mod tests; diff --git a/rslib/src/import_export/package/colpkg/tests.rs b/rslib/src/import_export/package/colpkg/tests.rs new file mode 100644 index 000000000..50cec595a --- /dev/null +++ b/rslib/src/import_export/package/colpkg/tests.rs @@ -0,0 +1,70 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +#![cfg(test)] + +use std::path::Path; + +use tempfile::tempdir; + +use crate::{ + collection::CollectionBuilder, import_export::package::import_colpkg, media::MediaManager, + prelude::*, +}; + +fn collection_with_media(dir: &Path, name: &str) -> Result { + let name = format!("{name}_src"); + let media_folder = dir.join(format!("{name}.media")); + std::fs::create_dir(&media_folder)?; + // add collection with sentinel note + let mut col = CollectionBuilder::new(dir.join(format!("{name}.anki2"))) + .set_media_paths(media_folder, dir.join(format!("{name}.mdb"))) + .build()?; + let nt = col.get_notetype_by_name("Basic")?.unwrap(); + let mut note = nt.new_note(); + col.add_note(&mut note, DeckId(1))?; + // add sample media + let mgr = MediaManager::new(&col.media_folder, &col.media_db)?; + let mut ctx = mgr.dbctx(); + mgr.add_file(&mut ctx, "1", b"1")?; + mgr.add_file(&mut ctx, "2", b"2")?; + mgr.add_file(&mut ctx, "3", b"3")?; + Ok(col) +} + +#[test] +fn roundtrip() -> Result<()> { + let _dir = tempdir()?; + let dir = _dir.path(); + + for (legacy, name) in [(true, "legacy"), (false, "v3")] { + // export to a file + let col = collection_with_media(dir, name)?; + let colpkg_name = dir.join(format!("{name}.colpkg")); + col.export_colpkg(&colpkg_name, true, legacy, |_| ())?; + // import into a new collection + let anki2_name = dir + .join(format!("{name}.anki2")) + .to_string_lossy() + .into_owned(); + let import_media_dir = dir.join(format!("{name}.media")); + import_colpkg( + &colpkg_name.to_string_lossy(), + &anki2_name, + import_media_dir.to_str().unwrap(), + |_| Ok(()), + )?; + // confirm collection imported + let col = CollectionBuilder::new(&anki2_name).build()?; + assert_eq!( + col.storage.db_scalar::("select count() from notes")?, + 1 + ); + // confirm media imported correctly + assert_eq!(std::fs::read(import_media_dir.join("1"))?, b"1"); + assert_eq!(std::fs::read(import_media_dir.join("2"))?, b"2"); + assert_eq!(std::fs::read(import_media_dir.join("3"))?, b"3"); + } + + Ok(()) +} diff --git a/rslib/src/import_export/package/meta.rs b/rslib/src/import_export/package/meta.rs new file mode 100644 index 000000000..df164918c --- /dev/null +++ b/rslib/src/import_export/package/meta.rs @@ -0,0 +1,45 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +pub(super) use crate::backend_proto::{package_metadata::Version, PackageMetadata as Meta}; + +impl Version { + pub(super) fn collection_filename(&self) -> &'static str { + match self { + Version::Unknown => unreachable!(), + Version::Legacy1 => "collection.anki2", + Version::Legacy2 => "collection.anki21", + Version::Latest => "collection.anki21b", + } + } +} + +impl Meta { + pub(super) fn new() -> Self { + Self { + version: Version::Latest as i32, + } + } + + pub(super) fn new_legacy() -> Self { + Self { + version: Version::Legacy2 as i32, + } + } + + pub(super) fn collection_filename(&self) -> &'static str { + self.version().collection_filename() + } + + pub(super) fn zstd_compressed(&self) -> bool { + !self.is_legacy() + } + + pub(super) fn media_list_is_hashmap(&self) -> bool { + self.is_legacy() + } + + fn is_legacy(&self) -> bool { + matches!(self.version(), Version::Legacy1 | Version::Legacy2) + } +} diff --git a/rslib/src/import_export/package/mod.rs b/rslib/src/import_export/package/mod.rs new file mode 100644 index 000000000..66d3ca14e --- /dev/null +++ b/rslib/src/import_export/package/mod.rs @@ -0,0 +1,11 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +mod colpkg; +mod meta; + +pub(crate) use colpkg::export::export_colpkg_from_data; +pub use colpkg::import::import_colpkg; +pub(self) use meta::{Meta, Version}; + +pub(self) use crate::backend_proto::{media_entries::MediaEntry, MediaEntries}; diff --git a/rslib/src/lib.rs b/rslib/src/lib.rs index 8a2a46115..d911c3d3f 100644 --- a/rslib/src/lib.rs +++ b/rslib/src/lib.rs @@ -18,6 +18,7 @@ pub mod decks; pub mod error; pub mod findreplace; pub mod i18n; +pub mod import_export; pub mod latex; pub mod links; pub mod log; diff --git a/rslib/src/scheduler/answering/mod.rs b/rslib/src/scheduler/answering/mod.rs index 04924a365..2eeb66252 100644 --- a/rslib/src/scheduler/answering/mod.rs +++ b/rslib/src/scheduler/answering/mod.rs @@ -590,6 +590,7 @@ mod test { }}; } + // FIXME: This fails between 3:50-4:00 GMT #[test] fn new_limited_by_reviews() -> Result<()> { let (mut col, cids) = v3_test_collection(4)?;