Colpkg fixes (#1722)

* Fix legacy colpkg import; disable v3 import/export; add roundtrip test

The test has revealed we weren't decompressing the media files on v3
import. That's easy to fix, but means all files need decompressing
even when they already exist, which is not ideal - it would be better
to store size/checksum in the metadata instead.

* Switch media and meta to protobuf; re-enable v3 import/export

- Fixed media not being decompressed on import
- The uncompressed size and checksum is now included for each media
entry, so that we can quickly check if a given file needs to be extracted.
We're still just doing a naive size comparison on colpkg import at the
moment, but we may want to use a checksum in the future, and will need
a checksum for apkg imports.
- Checksums can't be efficiently encoded in JSON, so the media list
has been switched to protobuf to reduce the the space requirements.
- The meta file has been switched to protobuf as well, for consistency.
This will mean any colpkg files exported with beta7 will be
unreadable.

* Avoid integer version comparisons

* Re-enable v3 test

* Apply suggestions from code review

Co-authored-by: RumovZ <gp5glkw78@relay.firefox.com>

* Add export_colpkg() method to Collection

More discoverable, and easier to call from unit tests

* Split import/export code out into separate folders

Currently colpkg/*.rs contain some routines that will be useful for
apkg import/export as well; in the future we can refactor them into a
separate file in the parent module.

* Return a proper error when media import fails

This tripped me up when writing the earlier unit test - I had called
the equivalent of import_colpkg()?, and it was returning a string error
that I didn't notice. In practice this should result in the same text
being shown in the UI, but just skips the tooltip.

* Automatically create media folder on import

* Move roundtrip test into separate file; check collection too

* Remove zstd version suffix

Prevents a warning shown each time Rust Analyzer is used to check the
code.

Co-authored-by: RumovZ <gp5glkw78@relay.firefox.com>
This commit is contained in:
Damien Elmes 2022-03-17 15:11:23 +10:00 committed by GitHub
parent 2d00b6659f
commit c2e8d89fc6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 587 additions and 321 deletions

View file

@ -25,6 +25,7 @@ enum ServiceIndex {
SERVICE_INDEX_COLLECTION = 13; SERVICE_INDEX_COLLECTION = 13;
SERVICE_INDEX_CARDS = 14; SERVICE_INDEX_CARDS = 14;
SERVICE_INDEX_LINKS = 15; SERVICE_INDEX_LINKS = 15;
SERVICE_INDEX_IMPORT_EXPORT = 16;
} }
message BackendInit { message BackendInit {

View file

@ -10,7 +10,6 @@ import "anki/generic.proto";
service CollectionService { service CollectionService {
rpc OpenCollection(OpenCollectionRequest) returns (generic.Empty); rpc OpenCollection(OpenCollectionRequest) returns (generic.Empty);
rpc CloseCollection(CloseCollectionRequest) returns (generic.Empty); rpc CloseCollection(CloseCollectionRequest) returns (generic.Empty);
rpc RestoreBackup(RestoreBackupRequest) returns (generic.String);
rpc CheckDatabase(generic.Empty) returns (CheckDatabaseResponse); rpc CheckDatabase(generic.Empty) returns (CheckDatabaseResponse);
rpc GetUndoStatus(generic.Empty) returns (UndoStatus); rpc GetUndoStatus(generic.Empty) returns (UndoStatus);
rpc Undo(generic.Empty) returns (OpChangesAfterUndo); rpc Undo(generic.Empty) returns (OpChangesAfterUndo);
@ -20,7 +19,6 @@ service CollectionService {
rpc LatestProgress(generic.Empty) returns (Progress); rpc LatestProgress(generic.Empty) returns (Progress);
rpc SetWantsAbort(generic.Empty) returns (generic.Empty); rpc SetWantsAbort(generic.Empty) returns (generic.Empty);
rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty); rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty);
rpc ExportCollection(ExportCollectionRequest) returns (generic.Empty);
} }
message OpenCollectionRequest { message OpenCollectionRequest {
@ -39,12 +37,6 @@ message CloseCollectionRequest {
optional uint64 minimum_backup_interval = 3; optional uint64 minimum_backup_interval = 3;
} }
message RestoreBackupRequest {
string col_path = 1;
string backup_path = 2;
string media_folder = 3;
}
message CheckDatabaseResponse { message CheckDatabaseResponse {
repeated string problems = 1; repeated string problems = 1;
} }
@ -125,9 +117,3 @@ message Progress {
uint32 exporting = 8; uint32 exporting = 8;
} }
} }
message ExportCollectionRequest {
string out_path = 1;
bool include_media = 2;
bool legacy = 3;
}

View file

@ -0,0 +1,52 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
syntax = "proto3";
package anki.import_export;
import "anki/generic.proto";
service ImportExportService {
rpc ImportCollectionPackage(ImportCollectionPackageRequest)
returns (generic.Empty);
rpc ExportCollectionPackage(ExportCollectionPackageRequest)
returns (generic.Empty);
}
message ImportCollectionPackageRequest {
string col_path = 1;
string backup_path = 2;
string media_folder = 3;
}
message ExportCollectionPackageRequest {
string out_path = 1;
bool include_media = 2;
bool legacy = 3;
}
message PackageMetadata {
enum Version {
VERSION_UNKNOWN = 0;
// When `meta` missing, and collection.anki2 file present.
VERSION_LEGACY_1 = 1;
// When `meta` missing, and collection.anki21 file present.
VERSION_LEGACY_2 = 2;
/// Implies MediaEntry media map, and zstd compression.
/// collection.21b file
VERSION_LATEST = 3;
}
Version version = 1;
}
message MediaEntries {
message MediaEntry {
string name = 1;
uint32 size = 2;
bytes sha1 = 3;
}
repeated MediaEntry entries = 1;
}

View file

@ -25,6 +25,7 @@ import anki.stats_pb2
import anki.card_rendering_pb2 import anki.card_rendering_pb2
import anki.tags_pb2 import anki.tags_pb2
import anki.media_pb2 import anki.media_pb2
import anki.import_export_pb2
import stringcase import stringcase
@ -184,6 +185,7 @@ service_modules = dict(
TAGS=anki.tags_pb2, TAGS=anki.tags_pb2,
MEDIA=anki.media_pb2, MEDIA=anki.media_pb2,
LINKS=anki.links_pb2, LINKS=anki.links_pb2,
IMPORT_EXPORT=anki.import_export_pb2,
) )
for service in anki.backend_pb2.ServiceIndex.DESCRIPTOR.values: for service in anki.backend_pb2.ServiceIndex.DESCRIPTOR.values:
@ -236,6 +238,7 @@ import anki.stats_pb2
import anki.card_rendering_pb2 import anki.card_rendering_pb2
import anki.tags_pb2 import anki.tags_pb2
import anki.media_pb2 import anki.media_pb2
import anki.import_export_pb2
class RustBackendGenerated: class RustBackendGenerated:
def _run_command(self, service: int, method: int, input: Any) -> bytes: def _run_command(self, service: int, method: int, input: Any) -> bytes:

View file

@ -268,7 +268,7 @@ class Collection(DeprecatedNamesMixin):
self, out_path: str, include_media: bool, legacy: bool self, out_path: str, include_media: bool, legacy: bool
) -> None: ) -> None:
self.close_for_full_sync() self.close_for_full_sync()
self._backend.export_collection( self._backend.export_collection_package(
out_path=out_path, include_media=include_media, legacy=legacy out_path=out_path, include_media=include_media, legacy=legacy
) )

View file

@ -479,10 +479,10 @@ def replace_with_apkg(
mw.taskman.run_on_main(lambda: mw.progress.update(label=label)) mw.taskman.run_on_main(lambda: mw.progress.update(label=label))
def do_import() -> str: def do_import() -> None:
col_path = mw.pm.collectionPath() col_path = mw.pm.collectionPath()
media_folder = os.path.join(mw.pm.profileFolder(), "collection.media") media_folder = os.path.join(mw.pm.profileFolder(), "collection.media")
return mw.backend.restore_backup( mw.backend.import_collection_package(
col_path=col_path, backup_path=filename, media_folder=media_folder col_path=col_path, backup_path=filename, media_folder=media_folder
) )
@ -491,14 +491,12 @@ def replace_with_apkg(
timer.deleteLater() timer.deleteLater()
try: try:
soft_error = future.result() future.result()
except Exception as error: except Exception as error:
if not isinstance(error, Interrupted): if not isinstance(error, Interrupted):
showWarning(str(error)) showWarning(str(error))
callback(False) callback(False)
else: else:
if soft_error:
showWarning(soft_error)
callback(True) callback(True)
qconnect(timer.timeout, on_progress) qconnect(timer.timeout, on_progress)

View file

@ -98,5 +98,5 @@ tokio-util = { version = "0.6.8", features = ["io"] }
pct-str = { git="https://github.com/timothee-haudebourg/pct-str.git", rev="4adccd8d4a222ab2672350a102f06ae832a0572d" } pct-str = { git="https://github.com/timothee-haudebourg/pct-str.git", rev="4adccd8d4a222ab2672350a102f06ae832a0572d" }
unic-ucd-category = "0.9.0" unic-ucd-category = "0.9.0"
id_tree = "1.8.0" id_tree = "1.8.0"
zstd = { version="0.10.0+zstd.1.5.2", features=["zstdmt"] } zstd = { version="0.10.0", features=["zstdmt"] }
num_cpus = "1.13.1" num_cpus = "1.13.1"

View file

@ -10,11 +10,7 @@ pub(super) use crate::backend_proto::collection_service::Service as CollectionSe
use crate::{ use crate::{
backend::progress::progress_to_proto, backend::progress::progress_to_proto,
backend_proto::{self as pb, preferences::Backups}, backend_proto::{self as pb, preferences::Backups},
collection::{ collection::{backup, CollectionBuilder},
backup::{self, ImportProgress},
exporting::export_collection_file,
CollectionBuilder,
},
log::{self}, log::{self},
prelude::*, prelude::*,
}; };
@ -76,42 +72,6 @@ impl CollectionService for Backend {
Ok(().into()) Ok(().into())
} }
fn export_collection(&self, input: pb::ExportCollectionRequest) -> Result<pb::Empty> {
self.abort_media_sync_and_wait();
let mut guard = self.lock_open_collection()?;
let col_inner = guard.take().unwrap();
let col_path = col_inner.col_path.clone();
let media_dir = input.include_media.then(|| col_inner.media_folder.clone());
col_inner.close(true)?;
export_collection_file(
input.out_path,
col_path,
media_dir,
input.legacy,
&self.tr,
self.export_progress_fn(),
)
.map(Into::into)
}
fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result<pb::String> {
let _guard = self.lock_closed_collection()?;
backup::restore_backup(
self.import_progress_fn(),
&input.col_path,
&input.backup_path,
&input.media_folder,
&self.tr,
)
.map(Into::into)
}
fn check_database(&self, _input: pb::Empty) -> Result<pb::CheckDatabaseResponse> { fn check_database(&self, _input: pb::Empty) -> Result<pb::CheckDatabaseResponse> {
let mut handler = self.new_progress_handler(); let mut handler = self.new_progress_handler();
let progress_fn = move |progress, throttle| { let progress_fn = move |progress, throttle| {
@ -154,7 +114,7 @@ impl CollectionService for Backend {
} }
impl Backend { impl Backend {
fn lock_open_collection(&self) -> Result<MutexGuard<Option<Collection>>> { pub(super) fn lock_open_collection(&self) -> Result<MutexGuard<Option<Collection>>> {
let guard = self.col.lock().unwrap(); let guard = self.col.lock().unwrap();
guard guard
.is_some() .is_some()
@ -162,7 +122,7 @@ impl Backend {
.ok_or(AnkiError::CollectionNotOpen) .ok_or(AnkiError::CollectionNotOpen)
} }
fn lock_closed_collection(&self) -> Result<MutexGuard<Option<Collection>>> { pub(super) fn lock_closed_collection(&self) -> Result<MutexGuard<Option<Collection>>> {
let guard = self.col.lock().unwrap(); let guard = self.col.lock().unwrap();
guard guard
.is_none() .is_none()
@ -195,23 +155,4 @@ impl Backend {
Ok(()) Ok(())
} }
fn import_progress_fn(&self) -> impl FnMut(ImportProgress) -> Result<()> {
let mut handler = self.new_progress_handler();
move |progress| {
let throttle = matches!(progress, ImportProgress::Media(_));
if handler.update(Progress::Import(progress), throttle) {
Ok(())
} else {
Err(AnkiError::Interrupted)
}
}
}
fn export_progress_fn(&self) -> impl FnMut(usize) {
let mut handler = self.new_progress_handler();
move |media_files| {
handler.update(Progress::Export(media_files), true);
}
}
} }

View file

@ -0,0 +1,67 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use super::{progress::Progress, Backend};
pub(super) use crate::backend_proto::importexport_service::Service as ImportExportService;
use crate::{
backend_proto::{self as pb},
import_export::{package::import_colpkg, ImportProgress},
prelude::*,
};
impl ImportExportService for Backend {
fn export_collection_package(
&self,
input: pb::ExportCollectionPackageRequest,
) -> Result<pb::Empty> {
self.abort_media_sync_and_wait();
let mut guard = self.lock_open_collection()?;
let col_inner = guard.take().unwrap();
col_inner
.export_colpkg(
input.out_path,
input.include_media,
input.legacy,
self.export_progress_fn(),
)
.map(Into::into)
}
fn import_collection_package(
&self,
input: pb::ImportCollectionPackageRequest,
) -> Result<pb::Empty> {
let _guard = self.lock_closed_collection()?;
import_colpkg(
&input.backup_path,
&input.col_path,
&input.media_folder,
self.import_progress_fn(),
)
.map(Into::into)
}
}
impl Backend {
fn import_progress_fn(&self) -> impl FnMut(ImportProgress) -> Result<()> {
let mut handler = self.new_progress_handler();
move |progress| {
let throttle = matches!(progress, ImportProgress::Media(_));
if handler.update(Progress::Import(progress), throttle) {
Ok(())
} else {
Err(AnkiError::Interrupted)
}
}
}
fn export_progress_fn(&self) -> impl FnMut(usize) {
let mut handler = self.new_progress_handler();
move |media_files| {
handler.update(Progress::Export(media_files), true);
}
}
}

View file

@ -15,6 +15,7 @@ mod decks;
mod error; mod error;
mod generic; mod generic;
mod i18n; mod i18n;
mod import_export;
mod links; mod links;
mod media; mod media;
mod notes; mod notes;
@ -47,6 +48,7 @@ use self::{
deckconfig::DeckConfigService, deckconfig::DeckConfigService,
decks::DecksService, decks::DecksService,
i18n::I18nService, i18n::I18nService,
import_export::ImportExportService,
links::LinksService, links::LinksService,
media::MediaService, media::MediaService,
notes::NotesService, notes::NotesService,
@ -145,6 +147,9 @@ impl Backend {
pb::ServiceIndex::Links => LinksService::run_method(self, method, input), pb::ServiceIndex::Links => LinksService::run_method(self, method, input),
pb::ServiceIndex::Collection => CollectionService::run_method(self, method, input), pb::ServiceIndex::Collection => CollectionService::run_method(self, method, input),
pb::ServiceIndex::Cards => CardsService::run_method(self, method, input), pb::ServiceIndex::Cards => CardsService::run_method(self, method, input),
pb::ServiceIndex::ImportExport => {
ImportExportService::run_method(self, method, input)
}
}) })
.map_err(|err| { .map_err(|err| {
let backend_err = err.into_protobuf(&self.tr); let backend_err = err.into_protobuf(&self.tr);

View file

@ -8,9 +8,9 @@ use futures::future::AbortHandle;
use super::Backend; use super::Backend;
use crate::{ use crate::{
backend_proto as pb, backend_proto as pb,
collection::backup::ImportProgress,
dbcheck::DatabaseCheckProgress, dbcheck::DatabaseCheckProgress,
i18n::I18n, i18n::I18n,
import_export::ImportProgress,
media::sync::MediaSyncProgress, media::sync::MediaSyncProgress,
sync::{FullSyncProgress, NormalSyncProgress, SyncStage}, sync::{FullSyncProgress, NormalSyncProgress, SyncStage},
}; };

View file

@ -22,6 +22,7 @@ protobuf!(deckconfig);
protobuf!(decks); protobuf!(decks);
protobuf!(generic); protobuf!(generic);
protobuf!(i18n); protobuf!(i18n);
protobuf!(import_export);
protobuf!(links); protobuf!(links);
protobuf!(media); protobuf!(media);
protobuf!(notes); protobuf!(notes);

View file

@ -2,10 +2,8 @@
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{ use std::{
collections::HashMap,
ffi::OsStr, ffi::OsStr,
fs::{self, read_dir, remove_file, DirEntry, File}, fs::{read_dir, remove_file, DirEntry},
io::{self, Write},
path::{Path, PathBuf}, path::{Path, PathBuf},
thread::{self, JoinHandle}, thread::{self, JoinHandle},
time::SystemTime, time::SystemTime,
@ -14,32 +12,16 @@ use std::{
use chrono::prelude::*; use chrono::prelude::*;
use itertools::Itertools; use itertools::Itertools;
use log::error; use log::error;
use tempfile::NamedTempFile;
use zip::ZipArchive;
use zstd::{self, stream::copy_decode};
use crate::{ use crate::{
backend_proto::preferences::Backups, backend_proto::preferences::Backups, import_export::package::export_colpkg_from_data, log,
collection::{
exporting::{export_collection_data, Meta, PACKAGE_VERSION},
CollectionBuilder,
},
error::ImportError,
log,
prelude::*, prelude::*,
text::normalize_to_nfc,
}; };
const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg"; const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg";
/// Default seconds after a backup, in which further backups will be skipped. /// Default seconds after a backup, in which further backups will be skipped.
const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60; const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ImportProgress {
Collection,
Media(usize),
}
pub fn backup( pub fn backup(
col_path: impl AsRef<Path>, col_path: impl AsRef<Path>,
backup_folder: impl AsRef<Path> + Send + 'static, backup_folder: impl AsRef<Path> + Send + 'static,
@ -69,45 +51,6 @@ fn has_recent_backup(backup_folder: &Path, recent_secs: u64) -> Result<bool> {
.any(|duration| duration.as_secs() < recent_secs)) .any(|duration| duration.as_secs() < recent_secs))
} }
pub fn restore_backup(
mut progress_fn: impl FnMut(ImportProgress) -> Result<()>,
col_path: &str,
backup_path: &str,
media_folder: &str,
tr: &I18n,
) -> Result<String> {
progress_fn(ImportProgress::Collection)?;
let col_path = PathBuf::from(col_path);
let col_dir = col_path
.parent()
.ok_or_else(|| AnkiError::invalid_input("bad collection path"))?;
let mut tempfile = NamedTempFile::new_in(col_dir)?;
let backup_file = File::open(backup_path)?;
let mut archive = ZipArchive::new(backup_file)?;
let meta = Meta::from_archive(&mut archive)?;
copy_collection(&mut archive, &mut tempfile, meta)?;
progress_fn(ImportProgress::Collection)?;
check_collection(tempfile.path())?;
progress_fn(ImportProgress::Collection)?;
let mut result = String::new();
if let Err(e) = restore_media(meta, progress_fn, &mut archive, media_folder) {
result = tr
.importing_failed_to_import_media_file(e.localized_description(tr))
.into_owned()
};
tempfile.as_file().sync_all()?;
tempfile.persist(&col_path).map_err(|err| err.error)?;
if !cfg!(windows) {
File::open(col_dir)?.sync_all()?;
}
Ok(result)
}
fn backup_inner<P: AsRef<Path>>( fn backup_inner<P: AsRef<Path>>(
col_data: &[u8], col_data: &[u8],
backup_folder: P, backup_folder: P,
@ -126,7 +69,7 @@ fn backup_inner<P: AsRef<Path>>(
fn write_backup<S: AsRef<OsStr>>(col_data: &[u8], backup_folder: S, tr: &I18n) -> Result<()> { fn write_backup<S: AsRef<OsStr>>(col_data: &[u8], backup_folder: S, tr: &I18n) -> Result<()> {
let out_path = let out_path =
Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING))); Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING)));
export_collection_data(&out_path, col_data, tr) export_colpkg_from_data(&out_path, col_data, tr)
} }
fn thin_backups<P: AsRef<Path>>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> { fn thin_backups<P: AsRef<Path>>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> {
@ -281,113 +224,6 @@ impl BackupFilter {
} }
} }
impl Meta {
/// Extracts meta data from an archive and checks if its version is supported.
fn from_archive(archive: &mut ZipArchive<File>) -> Result<Self> {
let mut meta: Self = archive
.by_name("meta")
.ok()
.and_then(|file| serde_json::from_reader(file).ok())
.unwrap_or_default();
if meta.version > PACKAGE_VERSION {
return Err(AnkiError::ImportError(ImportError::TooNew));
} else if meta.version == 0 {
meta.version = if archive.by_name("collection.anki21").is_ok() {
2
} else {
1
};
}
Ok(meta)
}
}
fn check_collection(col_path: &Path) -> Result<()> {
CollectionBuilder::new(col_path)
.build()
.ok()
.and_then(|col| {
col.storage
.db
.pragma_query_value(None, "integrity_check", |row| row.get::<_, String>(0))
.ok()
})
.and_then(|s| (s == "ok").then(|| ()))
.ok_or(AnkiError::ImportError(ImportError::Corrupt))
}
fn restore_media(
meta: Meta,
mut progress_fn: impl FnMut(ImportProgress) -> Result<()>,
archive: &mut ZipArchive<File>,
media_folder: &str,
) -> Result<()> {
let media_file_names = extract_media_file_names(meta, archive)?;
let mut count = 0;
for (archive_file_name, file_name) in media_file_names.iter().enumerate() {
count += 1;
if count % 10 == 0 {
progress_fn(ImportProgress::Media(count))?;
}
if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) {
let file_path = Path::new(&media_folder).join(normalize_to_nfc(file_name).as_ref());
let files_are_equal = fs::metadata(&file_path)
.map(|metadata| metadata.len() == zip_file.size())
.unwrap_or_default();
if !files_are_equal {
let mut file = match File::create(&file_path) {
Ok(file) => file,
Err(err) => return Err(AnkiError::file_io_error(err, &file_path)),
};
if let Err(err) = io::copy(&mut zip_file, &mut file) {
return Err(AnkiError::file_io_error(err, &file_path));
}
}
} else {
return Err(AnkiError::invalid_input(&format!(
"{archive_file_name} missing from archive"
)));
}
}
Ok(())
}
fn extract_media_file_names(meta: Meta, archive: &mut ZipArchive<File>) -> Result<Vec<String>> {
let mut file = archive.by_name("media")?;
let mut buf = Vec::new();
if meta.zstd_compressed() {
copy_decode(file, &mut buf)?;
} else {
io::copy(&mut file, &mut buf)?;
}
if meta.media_list_is_hashmap() {
let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
Ok(map.into_iter().map(|(_k, v)| v).collect())
} else {
serde_json::from_slice(&buf).map_err(Into::into)
}
}
fn copy_collection(
archive: &mut ZipArchive<File>,
writer: &mut impl Write,
meta: Meta,
) -> Result<()> {
let mut file = archive
.by_name(meta.collection_name())
.map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?;
if !meta.zstd_compressed() {
io::copy(&mut file, writer)?;
} else {
copy_decode(file, writer)?;
}
Ok(())
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;

View file

@ -2,7 +2,6 @@
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
pub mod backup; pub mod backup;
pub mod exporting;
pub(crate) mod timestamps; pub(crate) mod timestamps;
mod transact; mod transact;
pub(crate) mod undo; pub(crate) mod undo;

View file

@ -183,17 +183,19 @@ pub enum TemplateSaveErrorDetails {
ExtraneousCloze, ExtraneousCloze,
} }
#[derive(Debug, PartialEq, Clone, Copy)] #[derive(Debug, PartialEq, Clone)]
pub enum ImportError { pub enum ImportError {
Corrupt, Corrupt,
TooNew, TooNew,
MediaImportFailed(String),
} }
impl ImportError { impl ImportError {
fn localized_description(self, tr: &I18n) -> String { fn localized_description(&self, tr: &I18n) -> String {
match self { match self {
Self::Corrupt => tr.importing_the_provided_file_is_not_a(), ImportError::Corrupt => tr.importing_the_provided_file_is_not_a(),
Self::TooNew => tr.errors_collection_too_new(), ImportError::TooNew => tr.errors_collection_too_new(),
ImportError::MediaImportFailed(err) => tr.importing_failed_to_import_media_file(err),
} }
.into() .into()
} }

View file

@ -0,0 +1,10 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
pub mod package;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ImportProgress {
Collection,
Media(usize),
}

View file

@ -8,7 +8,7 @@ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use serde_derive::{Deserialize, Serialize}; use prost::Message;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
use zip::{write::FileOptions, CompressionMethod, ZipWriter}; use zip::{write::FileOptions, CompressionMethod, ZipWriter};
use zstd::{ use zstd::{
@ -16,54 +16,49 @@ use zstd::{
Encoder, Encoder,
}; };
use crate::{collection::CollectionBuilder, prelude::*, text::normalize_to_nfc}; use super::super::{MediaEntries, MediaEntry, Meta, Version};
use crate::{
collection::CollectionBuilder, media::files::sha1_of_data, prelude::*, text::normalize_to_nfc,
};
/// Bump if making changes that break restoring on older releases.
pub const PACKAGE_VERSION: u8 = 3;
const COLLECTION_NAME: &str = "collection.anki21b";
const COLLECTION_NAME_V1: &str = "collection.anki2";
const COLLECTION_NAME_V2: &str = "collection.anki21";
/// Enable multithreaded compression if over this size. For smaller files, /// Enable multithreaded compression if over this size. For smaller files,
/// multithreading makes things slower, and in initial tests, the crossover /// multithreading makes things slower, and in initial tests, the crossover
/// point was somewhere between 1MB and 10MB on a many-core system. /// point was somewhere between 1MB and 10MB on a many-core system.
const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024; const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024;
#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)] impl Collection {
#[serde(default)] pub fn export_colpkg(
pub(super) struct Meta { self,
#[serde(rename = "ver")] out_path: impl AsRef<Path>,
pub(super) version: u8, include_media: bool,
} legacy: bool,
progress_fn: impl FnMut(usize),
impl Meta { ) -> Result<()> {
pub(super) fn new() -> Self { let colpkg_name = out_path.as_ref();
Self { let src_path = self.col_path.clone();
version: PACKAGE_VERSION, let src_media_folder = if include_media {
Some(self.media_folder.clone())
} else {
None
};
let tr = self.tr.clone();
// FIXME: downgrade on v3 export is superfluous at current schema version. We don't
// want things to break when the schema is bumped in the future, so perhaps the
// exporting code should be downgrading to 18 instead of 11 (which will probably require
// changing the boolean to an enum).
self.close(true)?;
export_collection_file(
&colpkg_name,
&src_path,
src_media_folder,
legacy,
&tr,
progress_fn,
)
} }
} }
pub(super) fn new_v2() -> Self { fn export_collection_file(
Self { version: 2 }
}
pub(super) fn collection_name(&self) -> &'static str {
match self.version {
1 => COLLECTION_NAME_V1,
2 => COLLECTION_NAME_V2,
_ => COLLECTION_NAME,
}
}
pub(super) fn zstd_compressed(&self) -> bool {
self.version >= 3
}
pub(super) fn media_list_is_hashmap(&self) -> bool {
self.version < 3
}
}
pub fn export_collection_file(
out_path: impl AsRef<Path>, out_path: impl AsRef<Path>,
col_path: impl AsRef<Path>, col_path: impl AsRef<Path>,
media_dir: Option<PathBuf>, media_dir: Option<PathBuf>,
@ -71,7 +66,11 @@ pub fn export_collection_file(
tr: &I18n, tr: &I18n,
progress_fn: impl FnMut(usize), progress_fn: impl FnMut(usize),
) -> Result<()> { ) -> Result<()> {
let meta = if legacy { Meta::new_v2() } else { Meta::new() }; let meta = if legacy {
Meta::new_legacy()
} else {
Meta::new()
};
let mut col_file = File::open(col_path)?; let mut col_file = File::open(col_path)?;
let col_size = col_file.metadata()?.len() as usize; let col_size = col_file.metadata()?.len() as usize;
export_collection( export_collection(
@ -85,7 +84,8 @@ pub fn export_collection_file(
) )
} }
pub(crate) fn export_collection_data( /// Write copied collection data without any media.
pub(crate) fn export_colpkg_from_data(
out_path: impl AsRef<Path>, out_path: impl AsRef<Path>,
mut col_data: &[u8], mut col_data: &[u8],
tr: &I18n, tr: &I18n,
@ -115,10 +115,12 @@ fn export_collection(
let mut zip = ZipWriter::new(out_file); let mut zip = ZipWriter::new(out_file);
zip.start_file("meta", file_options_stored())?; zip.start_file("meta", file_options_stored())?;
zip.write_all(serde_json::to_string(&meta).unwrap().as_bytes())?; let mut meta_bytes = vec![];
write_collection(meta, &mut zip, col, col_size)?; meta.encode(&mut meta_bytes)?;
zip.write_all(&meta_bytes)?;
write_collection(&meta, &mut zip, col, col_size)?;
write_dummy_collection(&mut zip, tr)?; write_dummy_collection(&mut zip, tr)?;
write_media(meta, &mut zip, media_dir, progress_fn)?; write_media(&meta, &mut zip, media_dir, progress_fn)?;
zip.finish()?; zip.finish()?;
Ok(()) Ok(())
@ -129,16 +131,16 @@ fn file_options_stored() -> FileOptions {
} }
fn write_collection( fn write_collection(
meta: Meta, meta: &Meta,
zip: &mut ZipWriter<File>, zip: &mut ZipWriter<File>,
col: &mut impl Read, col: &mut impl Read,
size: usize, size: usize,
) -> Result<()> { ) -> Result<()> {
if meta.zstd_compressed() { if meta.zstd_compressed() {
zip.start_file(meta.collection_name(), file_options_stored())?; zip.start_file(meta.collection_filename(), file_options_stored())?;
zstd_copy(col, zip, size)?; zstd_copy(col, zip, size)?;
} else { } else {
zip.start_file(meta.collection_name(), FileOptions::default())?; zip.start_file(meta.collection_filename(), FileOptions::default())?;
io::copy(col, zip)?; io::copy(col, zip)?;
} }
Ok(()) Ok(())
@ -146,7 +148,10 @@ fn write_collection(
fn write_dummy_collection(zip: &mut ZipWriter<File>, tr: &I18n) -> Result<()> { fn write_dummy_collection(zip: &mut ZipWriter<File>, tr: &I18n) -> Result<()> {
let mut tempfile = create_dummy_collection_file(tr)?; let mut tempfile = create_dummy_collection_file(tr)?;
zip.start_file(COLLECTION_NAME_V1, file_options_stored())?; zip.start_file(
Version::Legacy1.collection_filename(),
file_options_stored(),
)?;
io::copy(&mut tempfile, zip)?; io::copy(&mut tempfile, zip)?;
Ok(()) Ok(())
@ -187,36 +192,45 @@ fn zstd_copy(reader: &mut impl Read, writer: &mut impl Write, size: usize) -> Re
} }
fn write_media( fn write_media(
meta: Meta, meta: &Meta,
zip: &mut ZipWriter<File>, zip: &mut ZipWriter<File>,
media_dir: Option<PathBuf>, media_dir: Option<PathBuf>,
progress_fn: impl FnMut(usize), progress_fn: impl FnMut(usize),
) -> Result<()> { ) -> Result<()> {
let mut media_names = vec![]; let mut media_entries = vec![];
if let Some(media_dir) = media_dir { if let Some(media_dir) = media_dir {
write_media_files(meta, zip, &media_dir, &mut media_names, progress_fn)?; write_media_files(meta, zip, &media_dir, &mut media_entries, progress_fn)?;
} }
write_media_map(meta, &media_names, zip)?; write_media_map(meta, media_entries, zip)?;
Ok(()) Ok(())
} }
fn write_media_map(meta: Meta, media_names: &[String], zip: &mut ZipWriter<File>) -> Result<()> { fn write_media_map(
meta: &Meta,
media_entries: Vec<MediaEntry>,
zip: &mut ZipWriter<File>,
) -> Result<()> {
zip.start_file("media", file_options_stored())?; zip.start_file("media", file_options_stored())?;
let json_bytes = if meta.media_list_is_hashmap() { let encoded_bytes = if meta.media_list_is_hashmap() {
let map: HashMap<String, &str> = media_names let map: HashMap<String, &str> = media_entries
.iter() .iter()
.enumerate() .enumerate()
.map(|(k, v)| (k.to_string(), v.as_str())) .map(|(k, entry)| (k.to_string(), entry.name.as_str()))
.collect(); .collect();
serde_json::to_vec(&map)? serde_json::to_vec(&map)?
} else { } else {
serde_json::to_vec(media_names)? let mut buf = vec![];
MediaEntries {
entries: media_entries,
}
.encode(&mut buf)?;
buf
}; };
let size = json_bytes.len(); let size = encoded_bytes.len();
let mut cursor = std::io::Cursor::new(json_bytes); let mut cursor = std::io::Cursor::new(encoded_bytes);
if meta.zstd_compressed() { if meta.zstd_compressed() {
zstd_copy(&mut cursor, zip, size)?; zstd_copy(&mut cursor, zip, size)?;
} else { } else {
@ -226,10 +240,10 @@ fn write_media_map(meta: Meta, media_names: &[String], zip: &mut ZipWriter<File>
} }
fn write_media_files( fn write_media_files(
meta: Meta, meta: &Meta,
zip: &mut ZipWriter<File>, zip: &mut ZipWriter<File>,
dir: &Path, dir: &Path,
names: &mut Vec<String>, media_entries: &mut Vec<MediaEntry>,
mut progress_fn: impl FnMut(usize), mut progress_fn: impl FnMut(usize),
) -> Result<()> { ) -> Result<()> {
let mut writer = MediaFileWriter::new(meta); let mut writer = MediaFileWriter::new(meta);
@ -240,9 +254,15 @@ fn write_media_files(
continue; continue;
} }
progress_fn(index); progress_fn(index);
names.push(normalized_unicode_file_name(&entry)?);
zip.start_file(index.to_string(), file_options_stored())?; zip.start_file(index.to_string(), file_options_stored())?;
writer = writer.write(&mut File::open(entry.path())?, zip)?;
let name = normalized_unicode_file_name(&entry)?;
// FIXME: we should chunk this
let data = std::fs::read(entry.path())?;
let media_entry = make_media_entry(&data, name);
writer = writer.write(&mut std::io::Cursor::new(data), zip)?;
media_entries.push(media_entry);
// can't enumerate(), as we skip folders // can't enumerate(), as we skip folders
index += 1; index += 1;
} }
@ -250,6 +270,14 @@ fn write_media_files(
Ok(()) Ok(())
} }
fn make_media_entry(data: &[u8], name: String) -> MediaEntry {
MediaEntry {
name,
size: data.len() as u32,
sha1: sha1_of_data(data).to_vec(),
}
}
fn normalized_unicode_file_name(entry: &DirEntry) -> Result<String> { fn normalized_unicode_file_name(entry: &DirEntry) -> Result<String> {
entry entry
.file_name() .file_name()
@ -268,7 +296,7 @@ fn normalized_unicode_file_name(entry: &DirEntry) -> Result<String> {
struct MediaFileWriter(Option<RawEncoder<'static>>); struct MediaFileWriter(Option<RawEncoder<'static>>);
impl MediaFileWriter { impl MediaFileWriter {
fn new(meta: Meta) -> Self { fn new(meta: &Meta) -> Self {
Self( Self(
meta.zstd_compressed() meta.zstd_compressed()
.then(|| RawEncoder::with_dictionary(0, &[]).unwrap()), .then(|| RawEncoder::with_dictionary(0, &[]).unwrap()),

View file

@ -0,0 +1,203 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{
collections::HashMap,
fs::{self, File},
io::{self, Read, Write},
path::{Path, PathBuf},
};
use prost::Message;
use tempfile::NamedTempFile;
use zip::ZipArchive;
use zstd::{self, stream::copy_decode};
use super::super::Version;
use crate::{
collection::CollectionBuilder,
error::ImportError,
import_export::{
package::{MediaEntries, MediaEntry, Meta},
ImportProgress,
},
prelude::*,
text::normalize_to_nfc,
};
impl Meta {
/// Extracts meta data from an archive and checks if its version is supported.
pub(super) fn from_archive(archive: &mut ZipArchive<File>) -> Result<Self> {
let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| {
let mut buf = vec![];
meta_file.read_to_end(&mut buf).ok()?;
Some(buf)
});
let meta = if let Some(bytes) = meta_bytes {
let meta: Meta = Message::decode(&*bytes)?;
if meta.version() == Version::Unknown {
return Err(AnkiError::ImportError(ImportError::TooNew));
}
meta
} else {
Meta {
version: if archive.by_name("collection.anki21").is_ok() {
Version::Legacy2
} else {
Version::Legacy1
} as i32,
}
};
Ok(meta)
}
}
pub fn import_colpkg(
colpkg_path: &str,
target_col_path: &str,
target_media_folder: &str,
mut progress_fn: impl FnMut(ImportProgress) -> Result<()>,
) -> Result<()> {
progress_fn(ImportProgress::Collection)?;
let col_path = PathBuf::from(target_col_path);
let col_dir = col_path
.parent()
.ok_or_else(|| AnkiError::invalid_input("bad collection path"))?;
let mut tempfile = NamedTempFile::new_in(col_dir)?;
let backup_file = File::open(colpkg_path)?;
let mut archive = ZipArchive::new(backup_file)?;
let meta = Meta::from_archive(&mut archive)?;
copy_collection(&mut archive, &mut tempfile, &meta)?;
progress_fn(ImportProgress::Collection)?;
check_collection(tempfile.path())?;
progress_fn(ImportProgress::Collection)?;
let media_import_result = restore_media(&meta, progress_fn, &mut archive, target_media_folder);
// Proceed with replacing collection, regardless of media import result
tempfile.as_file().sync_all()?;
tempfile.persist(&col_path).map_err(|err| err.error)?;
if !cfg!(windows) {
File::open(col_dir)?.sync_all()?;
}
media_import_result
}
fn check_collection(col_path: &Path) -> Result<()> {
CollectionBuilder::new(col_path)
.build()
.ok()
.and_then(|col| {
col.storage
.db
.pragma_query_value(None, "integrity_check", |row| row.get::<_, String>(0))
.ok()
})
.and_then(|s| (s == "ok").then(|| ()))
.ok_or(AnkiError::ImportError(ImportError::Corrupt))
}
fn restore_media(
meta: &Meta,
mut progress_fn: impl FnMut(ImportProgress) -> Result<()>,
archive: &mut ZipArchive<File>,
media_folder: &str,
) -> Result<()> {
let media_entries = extract_media_entries(meta, archive)?;
std::fs::create_dir_all(media_folder)?;
let mut count = 0;
for (archive_file_name, entry) in media_entries.iter().enumerate() {
count += 1;
if count % 10 == 0 {
progress_fn(ImportProgress::Media(count))?;
}
if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) {
let file_path = Path::new(&media_folder).join(normalize_to_nfc(&entry.name).as_ref());
let size_in_colpkg = if meta.media_list_is_hashmap() {
zip_file.size()
} else {
entry.size as u64
};
let files_are_equal = fs::metadata(&file_path)
.map(|metadata| metadata.len() == size_in_colpkg)
.unwrap_or_default();
if !files_are_equal {
// FIXME: write to temp file and atomic rename
let mut file = match File::create(&file_path) {
Ok(file) => file,
Err(err) => return Err(AnkiError::file_io_error(err, &file_path)),
};
if meta.zstd_compressed() {
copy_decode(&mut zip_file, &mut file)
} else {
io::copy(&mut zip_file, &mut file).map(|_| ())
}
.map_err(|err| AnkiError::file_io_error(err, &file_path))?;
}
} else {
return Err(AnkiError::invalid_input(&format!(
"{archive_file_name} missing from archive"
)));
}
}
Ok(())
}
fn extract_media_entries(meta: &Meta, archive: &mut ZipArchive<File>) -> Result<Vec<MediaEntry>> {
let mut file = archive.by_name("media")?;
let mut buf = Vec::new();
if meta.zstd_compressed() {
copy_decode(file, &mut buf)?;
} else {
io::copy(&mut file, &mut buf)?;
}
if meta.media_list_is_hashmap() {
let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
let mut entries: Vec<(usize, String)> = map
.into_iter()
.map(|(k, v)| (k.parse().unwrap_or_default(), v))
.collect();
entries.sort_unstable();
// any gaps in the file numbers would lead to media being imported under the wrong name
if entries
.iter()
.enumerate()
.any(|(idx1, (idx2, _))| idx1 != *idx2)
{
return Err(AnkiError::ImportError(ImportError::Corrupt));
}
Ok(entries
.into_iter()
.map(|(_str_idx, name)| MediaEntry {
name,
size: 0,
sha1: vec![],
})
.collect())
} else {
let entries: MediaEntries = Message::decode(&*buf)?;
Ok(entries.entries)
}
}
fn copy_collection(
archive: &mut ZipArchive<File>,
writer: &mut impl Write,
meta: &Meta,
) -> Result<()> {
let mut file = archive
.by_name(meta.collection_filename())
.map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?;
if !meta.zstd_compressed() {
io::copy(&mut file, writer)?;
} else {
copy_decode(file, writer)?;
}
Ok(())
}

View file

@ -0,0 +1,6 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
pub(super) mod export;
pub(super) mod import;
mod tests;

View file

@ -0,0 +1,70 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
#![cfg(test)]
use std::path::Path;
use tempfile::tempdir;
use crate::{
collection::CollectionBuilder, import_export::package::import_colpkg, media::MediaManager,
prelude::*,
};
fn collection_with_media(dir: &Path, name: &str) -> Result<Collection> {
let name = format!("{name}_src");
let media_folder = dir.join(format!("{name}.media"));
std::fs::create_dir(&media_folder)?;
// add collection with sentinel note
let mut col = CollectionBuilder::new(dir.join(format!("{name}.anki2")))
.set_media_paths(media_folder, dir.join(format!("{name}.mdb")))
.build()?;
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
col.add_note(&mut note, DeckId(1))?;
// add sample media
let mgr = MediaManager::new(&col.media_folder, &col.media_db)?;
let mut ctx = mgr.dbctx();
mgr.add_file(&mut ctx, "1", b"1")?;
mgr.add_file(&mut ctx, "2", b"2")?;
mgr.add_file(&mut ctx, "3", b"3")?;
Ok(col)
}
#[test]
fn roundtrip() -> Result<()> {
let _dir = tempdir()?;
let dir = _dir.path();
for (legacy, name) in [(true, "legacy"), (false, "v3")] {
// export to a file
let col = collection_with_media(dir, name)?;
let colpkg_name = dir.join(format!("{name}.colpkg"));
col.export_colpkg(&colpkg_name, true, legacy, |_| ())?;
// import into a new collection
let anki2_name = dir
.join(format!("{name}.anki2"))
.to_string_lossy()
.into_owned();
let import_media_dir = dir.join(format!("{name}.media"));
import_colpkg(
&colpkg_name.to_string_lossy(),
&anki2_name,
import_media_dir.to_str().unwrap(),
|_| Ok(()),
)?;
// confirm collection imported
let col = CollectionBuilder::new(&anki2_name).build()?;
assert_eq!(
col.storage.db_scalar::<i32>("select count() from notes")?,
1
);
// confirm media imported correctly
assert_eq!(std::fs::read(import_media_dir.join("1"))?, b"1");
assert_eq!(std::fs::read(import_media_dir.join("2"))?, b"2");
assert_eq!(std::fs::read(import_media_dir.join("3"))?, b"3");
}
Ok(())
}

View file

@ -0,0 +1,45 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
pub(super) use crate::backend_proto::{package_metadata::Version, PackageMetadata as Meta};
impl Version {
pub(super) fn collection_filename(&self) -> &'static str {
match self {
Version::Unknown => unreachable!(),
Version::Legacy1 => "collection.anki2",
Version::Legacy2 => "collection.anki21",
Version::Latest => "collection.anki21b",
}
}
}
impl Meta {
pub(super) fn new() -> Self {
Self {
version: Version::Latest as i32,
}
}
pub(super) fn new_legacy() -> Self {
Self {
version: Version::Legacy2 as i32,
}
}
pub(super) fn collection_filename(&self) -> &'static str {
self.version().collection_filename()
}
pub(super) fn zstd_compressed(&self) -> bool {
!self.is_legacy()
}
pub(super) fn media_list_is_hashmap(&self) -> bool {
self.is_legacy()
}
fn is_legacy(&self) -> bool {
matches!(self.version(), Version::Legacy1 | Version::Legacy2)
}
}

View file

@ -0,0 +1,11 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
mod colpkg;
mod meta;
pub(crate) use colpkg::export::export_colpkg_from_data;
pub use colpkg::import::import_colpkg;
pub(self) use meta::{Meta, Version};
pub(self) use crate::backend_proto::{media_entries::MediaEntry, MediaEntries};

View file

@ -18,6 +18,7 @@ pub mod decks;
pub mod error; pub mod error;
pub mod findreplace; pub mod findreplace;
pub mod i18n; pub mod i18n;
pub mod import_export;
pub mod latex; pub mod latex;
pub mod links; pub mod links;
pub mod log; pub mod log;

View file

@ -590,6 +590,7 @@ mod test {
}}; }};
} }
// FIXME: This fails between 3:50-4:00 GMT
#[test] #[test]
fn new_limited_by_reviews() -> Result<()> { fn new_limited_by_reviews() -> Result<()> {
let (mut col, cids) = v3_test_collection(4)?; let (mut col, cids) = v3_test_collection(4)?;