Backend colpkg exporting (#1719)

* Implement colpkg exporting on backend

* Use exporting logic in backup.rs

* Refactor exporting.rs

* Add backend function to export collection

* Refactor backend/collection.rs

* Use backend for colpkg exporting

* Don't use default zip compression for media

* Add exporting progress

* Refactor media file writing

* Write dummy collections

* Localize dummy collection note

* Minimize dummy db size

* Use `NamedTempFile::new()` instead of `new_in`

* Drop redundant v2 dummy collection

* COLLECTION_VERSION -> PACKAGE_VERSION

* Split `lock_collection()` into two to drop flag

* Expose new colpkg in GUI

* Improve dummy collection message

* Please type checker

* importing-colpkg-too-new -> exporting-...

* Compress the media map in the v3 package (dae)

On collections with lots of media, it can grow into megabytes.

Also return an error in extract_media_file_names(), instead of masking
it as an optional.

* Store media map as a vector in the v3 package (dae)

This compresses better (eg 280kb original, 100kb hashmap, 42kb vec)

In the colpkg import case we don't need random access. When importing
an apkg, we will need to be able to fetch file data for a given media
filename, but the existing map doesn't help us there, as we need
filename->index, not index->filename.

* Ensure folders in the media dir don't break the file mapping (dae)
This commit is contained in:
RumovZ 2022-03-15 07:48:02 +01:00 committed by GitHub
parent d7a101827a
commit e759885734
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 463 additions and 130 deletions

View file

@ -5,6 +5,7 @@ exporting-anki-deck-package = Anki Deck Package
exporting-cards-in-plain-text = Cards in Plain Text exporting-cards-in-plain-text = Cards in Plain Text
exporting-collection = collection exporting-collection = collection
exporting-collection-exported = Collection exported. exporting-collection-exported = Collection exported.
exporting-colpkg-too-new = Please update to the latest Anki version, then import the .colpkg file again.
exporting-couldnt-save-file = Couldn't save file: { $val } exporting-couldnt-save-file = Couldn't save file: { $val }
exporting-export = Export... exporting-export = Export...
exporting-export-format = <b>Export format</b>: exporting-export-format = <b>Export format</b>:

View file

@ -20,6 +20,7 @@ service CollectionService {
rpc LatestProgress(generic.Empty) returns (Progress); rpc LatestProgress(generic.Empty) returns (Progress);
rpc SetWantsAbort(generic.Empty) returns (generic.Empty); rpc SetWantsAbort(generic.Empty) returns (generic.Empty);
rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty); rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty);
rpc ExportCollection(ExportCollectionRequest) returns (generic.Empty);
} }
message OpenCollectionRequest { message OpenCollectionRequest {
@ -121,5 +122,12 @@ message Progress {
NormalSync normal_sync = 5; NormalSync normal_sync = 5;
DatabaseCheck database_check = 6; DatabaseCheck database_check = 6;
string importing = 7; string importing = 7;
uint32 exporting = 8;
} }
} }
message ExportCollectionRequest {
string out_path = 1;
bool include_media = 2;
bool legacy = 3;
}

View file

@ -264,6 +264,14 @@ class Collection(DeprecatedNamesMixin):
self._clear_caches() self._clear_caches()
self.db = None self.db = None
def export_collection(
self, out_path: str, include_media: bool, legacy: bool
) -> None:
self.close_for_full_sync()
self._backend.export_collection(
out_path=out_path, include_media=include_media, legacy=legacy
)
def rollback(self) -> None: def rollback(self) -> None:
self._clear_caches() self._clear_caches()
self.db.rollback() self.db.rollback()

View file

@ -9,6 +9,8 @@ import json
import os import os
import re import re
import shutil import shutil
import threading
import time
import unicodedata import unicodedata
import zipfile import zipfile
from io import BufferedWriter from io import BufferedWriter
@ -419,6 +421,7 @@ class AnkiCollectionPackageExporter(AnkiPackageExporter):
ext = ".colpkg" ext = ".colpkg"
verbatim = True verbatim = True
includeSched = None includeSched = None
LEGACY = True
def __init__(self, col): def __init__(self, col):
AnkiPackageExporter.__init__(self, col) AnkiPackageExporter.__init__(self, col)
@ -427,22 +430,32 @@ class AnkiCollectionPackageExporter(AnkiPackageExporter):
def key(col: Collection) -> str: def key(col: Collection) -> str:
return col.tr.exporting_anki_collection_package() return col.tr.exporting_anki_collection_package()
def doExport(self, z, path): def exportInto(self, path: str) -> None:
"Export collection. Caller must re-open afterwards." """Export collection. Caller must re-open afterwards."""
# close our deck & write it into the zip file
self.count = self.col.card_count() def exporting_media() -> bool:
v2 = self.col.sched_ver() != 1 return any(
mdir = self.col.media.dir() hook.__name__ == "exported_media"
self.col.close(downgrade=True) for hook in hooks.media_files_did_export._hooks
if not v2: )
z.write(self.col.path, "collection.anki2")
else: def progress() -> None:
self._addDummyCollection(z) while exporting_media():
z.write(self.col.path, "collection.anki21") progress = self.col._backend.latest_progress()
# copy all media if progress.HasField("exporting"):
if not self.includeMedia: hooks.media_files_did_export(progress.exporting)
return {} time.sleep(0.1)
return self._exportMedia(z, os.listdir(mdir), mdir)
threading.Thread(target=progress).start()
self.col.export_collection(path, self.includeMedia, self.LEGACY)
class AnkiCollectionPackage21bExporter(AnkiCollectionPackageExporter):
LEGACY = False
@staticmethod
def key(_col: Collection) -> str:
return "Anki 2.1.50+ Collection Package"
# Export modules # Export modules
@ -459,6 +472,7 @@ def exporters(col: Collection) -> list[tuple[str, Any]]:
exps = [ exps = [
id(AnkiCollectionPackageExporter), id(AnkiCollectionPackageExporter),
id(AnkiCollectionPackage21bExporter),
id(AnkiPackageExporter), id(AnkiPackageExporter),
id(TextNoteExporter), id(TextNoteExporter),
id(TextCardExporter), id(TextCardExporter),

View file

@ -1,7 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors // Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::path::Path; use std::{path::Path, sync::MutexGuard};
use slog::error; use slog::error;
@ -12,6 +12,7 @@ use crate::{
backend_proto::{self as pb, preferences::Backups}, backend_proto::{self as pb, preferences::Backups},
collection::{ collection::{
backup::{self, ImportProgress}, backup::{self, ImportProgress},
exporting::export_collection_file,
CollectionBuilder, CollectionBuilder,
}, },
log::{self}, log::{self},
@ -30,10 +31,7 @@ impl CollectionService for Backend {
} }
fn open_collection(&self, input: pb::OpenCollectionRequest) -> Result<pb::Empty> { fn open_collection(&self, input: pb::OpenCollectionRequest) -> Result<pb::Empty> {
let mut col = self.col.lock().unwrap(); let mut guard = self.lock_closed_collection()?;
if col.is_some() {
return Err(AnkiError::CollectionAlreadyOpen);
}
let mut builder = CollectionBuilder::new(input.collection_path); let mut builder = CollectionBuilder::new(input.collection_path);
builder builder
@ -46,7 +44,7 @@ impl CollectionService for Backend {
builder.set_logger(self.log.clone()); builder.set_logger(self.log.clone());
} }
*col = Some(builder.build()?); *guard = Some(builder.build()?);
Ok(().into()) Ok(().into())
} }
@ -54,12 +52,9 @@ impl CollectionService for Backend {
fn close_collection(&self, input: pb::CloseCollectionRequest) -> Result<pb::Empty> { fn close_collection(&self, input: pb::CloseCollectionRequest) -> Result<pb::Empty> {
self.abort_media_sync_and_wait(); self.abort_media_sync_and_wait();
let mut col = self.col.lock().unwrap(); let mut guard = self.lock_open_collection()?;
if col.is_none() {
return Err(AnkiError::CollectionNotOpen);
}
let mut col_inner = col.take().unwrap(); let mut col_inner = guard.take().unwrap();
let limits = col_inner.get_backups(); let limits = col_inner.get_backups();
let col_path = std::mem::take(&mut col_inner.col_path); let col_path = std::mem::take(&mut col_inner.col_path);
@ -82,30 +77,39 @@ impl CollectionService for Backend {
Ok(().into()) Ok(().into())
} }
fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result<pb::String> { fn export_collection(&self, input: pb::ExportCollectionRequest) -> Result<pb::Empty> {
let col = self.col.lock().unwrap(); self.abort_media_sync_and_wait();
if col.is_some() {
Err(AnkiError::CollectionAlreadyOpen)
} else {
let mut handler = self.new_progress_handler();
let progress_fn = move |progress| {
let throttle = matches!(progress, ImportProgress::Media(_));
if handler.update(Progress::Import(progress), throttle) {
Ok(())
} else {
Err(AnkiError::Interrupted)
}
};
backup::restore_backup( let mut guard = self.lock_open_collection()?;
progress_fn,
&input.col_path, let col_inner = guard.take().unwrap();
&input.backup_path, let col_path = col_inner.col_path.clone();
&input.media_folder, let media_dir = input.include_media.then(|| col_inner.media_folder.clone());
&self.tr,
) col_inner.close(true)?;
.map(Into::into)
} export_collection_file(
input.out_path,
col_path,
media_dir,
input.legacy,
&self.tr,
self.export_progress_fn(),
)
.map(Into::into)
}
fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result<pb::String> {
let _guard = self.lock_closed_collection()?;
backup::restore_backup(
self.import_progress_fn(),
&input.col_path,
&input.backup_path,
&input.media_folder,
&self.tr,
)
.map(Into::into)
} }
fn check_database(&self, _input: pb::Empty) -> Result<pb::CheckDatabaseResponse> { fn check_database(&self, _input: pb::Empty) -> Result<pb::CheckDatabaseResponse> {
@ -150,6 +154,22 @@ impl CollectionService for Backend {
} }
impl Backend { impl Backend {
fn lock_open_collection(&self) -> Result<MutexGuard<Option<Collection>>> {
let guard = self.col.lock().unwrap();
guard
.is_some()
.then(|| guard)
.ok_or(AnkiError::CollectionNotOpen)
}
fn lock_closed_collection(&self) -> Result<MutexGuard<Option<Collection>>> {
let guard = self.col.lock().unwrap();
guard
.is_none()
.then(|| guard)
.ok_or(AnkiError::CollectionAlreadyOpen)
}
fn await_backup_completion(&self) { fn await_backup_completion(&self) {
if let Some(task) = self.backup_task.lock().unwrap().take() { if let Some(task) = self.backup_task.lock().unwrap().take() {
task.join().unwrap(); task.join().unwrap();
@ -170,8 +190,28 @@ impl Backend {
limits, limits,
minimum_backup_interval, minimum_backup_interval,
self.log.clone(), self.log.clone(),
self.tr.clone(),
)?; )?;
Ok(()) Ok(())
} }
fn import_progress_fn(&self) -> impl FnMut(ImportProgress) -> Result<()> {
let mut handler = self.new_progress_handler();
move |progress| {
let throttle = matches!(progress, ImportProgress::Media(_));
if handler.update(Progress::Import(progress), throttle) {
Ok(())
} else {
Err(AnkiError::Interrupted)
}
}
}
fn export_progress_fn(&self) -> impl FnMut(usize) {
let mut handler = self.new_progress_handler();
move |media_files| {
handler.update(Progress::Export(media_files), true);
}
}
} }

View file

@ -52,6 +52,7 @@ pub(super) enum Progress {
NormalSync(NormalSyncProgress), NormalSync(NormalSyncProgress),
DatabaseCheck(DatabaseCheckProgress), DatabaseCheck(DatabaseCheckProgress),
Import(ImportProgress), Import(ImportProgress),
Export(usize),
} }
pub(super) fn progress_to_proto(progress: Option<Progress>, tr: &I18n) -> pb::Progress { pub(super) fn progress_to_proto(progress: Option<Progress>, tr: &I18n) -> pb::Progress {
@ -112,6 +113,7 @@ pub(super) fn progress_to_proto(progress: Option<Progress>, tr: &I18n) -> pb::Pr
} }
.into(), .into(),
), ),
Progress::Export(progress) => pb::progress::Value::Exporting(progress as u32),
} }
} else { } else {
pb::progress::Value::None(pb::Empty {}) pb::progress::Value::None(pb::Empty {})

View file

@ -5,7 +5,7 @@ use std::{
collections::HashMap, collections::HashMap,
ffi::OsStr, ffi::OsStr,
fs::{self, read_dir, remove_file, DirEntry, File}, fs::{self, read_dir, remove_file, DirEntry, File},
io::{self, Read, Write}, io::{self, Write},
path::{Path, PathBuf}, path::{Path, PathBuf},
thread::{self, JoinHandle}, thread::{self, JoinHandle},
time::SystemTime, time::SystemTime,
@ -14,32 +14,25 @@ use std::{
use chrono::prelude::*; use chrono::prelude::*;
use itertools::Itertools; use itertools::Itertools;
use log::error; use log::error;
use serde_derive::{Deserialize, Serialize};
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
use zip::{write::FileOptions, CompressionMethod, ZipArchive, ZipWriter}; use zip::ZipArchive;
use zstd::{self, stream::copy_decode, Encoder}; use zstd::{self, stream::copy_decode};
use crate::{ use crate::{
backend_proto::preferences::Backups, collection::CollectionBuilder, error::ImportError, log, backend_proto::preferences::Backups,
prelude::*, text::normalize_to_nfc, collection::{
exporting::{export_collection_data, Meta, PACKAGE_VERSION},
CollectionBuilder,
},
error::ImportError,
log,
prelude::*,
text::normalize_to_nfc,
}; };
/// Bump if making changes that break restoring on older releases.
const BACKUP_VERSION: u8 = 3;
const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg"; const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg";
/// Default seconds after a backup, in which further backups will be skipped. /// Default seconds after a backup, in which further backups will be skipped.
const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60; const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60;
/// Enable multithreaded compression if over this size. For smaller files,
/// multithreading makes things slower, and in initial tests, the crossover
/// point was somewhere between 1MB and 10MB on a many-core system.
const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024;
#[derive(Debug, Default, Serialize, Deserialize)]
#[serde(default)]
struct Meta {
#[serde(rename = "ver")]
version: u8,
}
#[derive(Debug, Clone, Copy, PartialEq)] #[derive(Debug, Clone, Copy, PartialEq)]
pub enum ImportProgress { pub enum ImportProgress {
@ -53,6 +46,7 @@ pub fn backup(
limits: Backups, limits: Backups,
minimum_backup_interval: Option<u64>, minimum_backup_interval: Option<u64>,
log: Logger, log: Logger,
tr: I18n,
) -> Result<Option<JoinHandle<()>>> { ) -> Result<Option<JoinHandle<()>>> {
let recent_secs = minimum_backup_interval.unwrap_or(MINIMUM_BACKUP_INTERVAL); let recent_secs = minimum_backup_interval.unwrap_or(MINIMUM_BACKUP_INTERVAL);
if recent_secs > 0 && has_recent_backup(backup_folder.as_ref(), recent_secs)? { if recent_secs > 0 && has_recent_backup(backup_folder.as_ref(), recent_secs)? {
@ -60,7 +54,7 @@ pub fn backup(
} else { } else {
let col_data = std::fs::read(col_path)?; let col_data = std::fs::read(col_path)?;
Ok(Some(thread::spawn(move || { Ok(Some(thread::spawn(move || {
backup_inner(&col_data, &backup_folder, limits, log) backup_inner(&col_data, &backup_folder, limits, log, &tr)
}))) })))
} }
} }
@ -99,7 +93,7 @@ pub fn restore_backup(
progress_fn(ImportProgress::Collection)?; progress_fn(ImportProgress::Collection)?;
let mut result = String::new(); let mut result = String::new();
if let Err(e) = restore_media(progress_fn, &mut archive, media_folder) { if let Err(e) = restore_media(meta, progress_fn, &mut archive, media_folder) {
result = tr result = tr
.importing_failed_to_import_media_file(e.localized_description(tr)) .importing_failed_to_import_media_file(e.localized_description(tr))
.into_owned() .into_owned()
@ -114,8 +108,14 @@ pub fn restore_backup(
Ok(result) Ok(result)
} }
fn backup_inner<P: AsRef<Path>>(col_data: &[u8], backup_folder: P, limits: Backups, log: Logger) { fn backup_inner<P: AsRef<Path>>(
if let Err(error) = write_backup(col_data, backup_folder.as_ref()) { col_data: &[u8],
backup_folder: P,
limits: Backups,
log: Logger,
tr: &I18n,
) {
if let Err(error) = write_backup(col_data, backup_folder.as_ref(), tr) {
error!(log, "failed to backup collection: {error:?}"); error!(log, "failed to backup collection: {error:?}");
} }
if let Err(error) = thin_backups(backup_folder, limits, &log) { if let Err(error) = thin_backups(backup_folder, limits, &log) {
@ -123,36 +123,10 @@ fn backup_inner<P: AsRef<Path>>(col_data: &[u8], backup_folder: P, limits: Backu
} }
} }
fn write_backup<S: AsRef<OsStr>>(mut col_data: &[u8], backup_folder: S) -> Result<()> { fn write_backup<S: AsRef<OsStr>>(col_data: &[u8], backup_folder: S, tr: &I18n) -> Result<()> {
let out_file = File::create(out_path(backup_folder))?; let out_path =
let mut zip = ZipWriter::new(out_file); Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING)));
let options = FileOptions::default().compression_method(CompressionMethod::Stored); export_collection_data(&out_path, col_data, tr)
let meta = serde_json::to_string(&Meta {
version: BACKUP_VERSION,
})
.unwrap();
zip.start_file("meta", options)?;
zip.write_all(meta.as_bytes())?;
zip.start_file("collection.anki21b", options)?;
let col_data_len = col_data.len();
zstd_copy(&mut col_data, &mut zip, col_data_len)?;
zip.start_file("media", options)?;
zip.write_all(b"{}")?;
zip.finish()?;
Ok(())
}
/// Copy contents of reader into writer, compressing as we copy.
fn zstd_copy<R: Read, W: Write>(reader: &mut R, writer: &mut W, size: usize) -> Result<()> {
let mut encoder = Encoder::new(writer, 0)?;
if size > MULTITHREAD_MIN_BYTES {
encoder.multithread(num_cpus::get() as u32)?;
}
io::copy(reader, &mut encoder)?;
encoder.finish()?;
Ok(())
} }
fn thin_backups<P: AsRef<Path>>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> { fn thin_backups<P: AsRef<Path>>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> {
@ -168,10 +142,6 @@ fn thin_backups<P: AsRef<Path>>(backup_folder: P, limits: Backups, log: &Logger)
Ok(()) Ok(())
} }
fn out_path<S: AsRef<OsStr>>(backup_folder: S) -> PathBuf {
Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING)))
}
fn datetime_from_file_name(file_name: &str) -> Option<DateTime<Local>> { fn datetime_from_file_name(file_name: &str) -> Option<DateTime<Local>> {
NaiveDateTime::parse_from_str(file_name, BACKUP_FORMAT_STRING) NaiveDateTime::parse_from_str(file_name, BACKUP_FORMAT_STRING)
.ok() .ok()
@ -319,7 +289,7 @@ impl Meta {
.ok() .ok()
.and_then(|file| serde_json::from_reader(file).ok()) .and_then(|file| serde_json::from_reader(file).ok())
.unwrap_or_default(); .unwrap_or_default();
if meta.version > BACKUP_VERSION { if meta.version > PACKAGE_VERSION {
return Err(AnkiError::ImportError(ImportError::TooNew)); return Err(AnkiError::ImportError(ImportError::TooNew));
} else if meta.version == 0 { } else if meta.version == 0 {
meta.version = if archive.by_name("collection.anki21").is_ok() { meta.version = if archive.by_name("collection.anki21").is_ok() {
@ -331,14 +301,6 @@ impl Meta {
Ok(meta) Ok(meta)
} }
fn collection_name(&self) -> &'static str {
match self.version {
1 => "collection.anki2",
2 => "collection.anki21",
_ => "collection.anki21b",
}
}
} }
fn check_collection(col_path: &Path) -> Result<()> { fn check_collection(col_path: &Path) -> Result<()> {
@ -356,21 +318,22 @@ fn check_collection(col_path: &Path) -> Result<()> {
} }
fn restore_media( fn restore_media(
meta: Meta,
mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, mut progress_fn: impl FnMut(ImportProgress) -> Result<()>,
archive: &mut ZipArchive<File>, archive: &mut ZipArchive<File>,
media_folder: &str, media_folder: &str,
) -> Result<()> { ) -> Result<()> {
let media_file_names = extract_media_file_names(archive).ok_or(AnkiError::NotFound)?; let media_file_names = extract_media_file_names(meta, archive)?;
let mut count = 0; let mut count = 0;
for (archive_file_name, file_name) in media_file_names { for (archive_file_name, file_name) in media_file_names.iter().enumerate() {
count += 1; count += 1;
if count % 10 == 0 { if count % 10 == 0 {
progress_fn(ImportProgress::Media(count))?; progress_fn(ImportProgress::Media(count))?;
} }
if let Ok(mut zip_file) = archive.by_name(&archive_file_name) { if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) {
let file_path = Path::new(&media_folder).join(normalize_to_nfc(&file_name).as_ref()); let file_path = Path::new(&media_folder).join(normalize_to_nfc(file_name).as_ref());
let files_are_equal = fs::metadata(&file_path) let files_are_equal = fs::metadata(&file_path)
.map(|metadata| metadata.len() == zip_file.size()) .map(|metadata| metadata.len() == zip_file.size())
.unwrap_or_default(); .unwrap_or_default();
@ -392,15 +355,20 @@ fn restore_media(
Ok(()) Ok(())
} }
fn extract_media_file_names(archive: &mut ZipArchive<File>) -> Option<HashMap<String, String>> { fn extract_media_file_names(meta: Meta, archive: &mut ZipArchive<File>) -> Result<Vec<String>> {
archive let mut file = archive.by_name("media")?;
.by_name("media") let mut buf = Vec::new();
.ok() if meta.zstd_compressed() {
.and_then(|mut file| { copy_decode(file, &mut buf)?;
let mut buf = Vec::new(); } else {
file.read_to_end(&mut buf).ok().map(|_| buf) io::copy(&mut file, &mut buf)?;
}) }
.and_then(|bytes| serde_json::from_slice(&bytes).ok()) if meta.media_list_is_hashmap() {
let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
Ok(map.into_iter().map(|(_k, v)| v).collect())
} else {
serde_json::from_slice(&buf).map_err(Into::into)
}
} }
fn copy_collection( fn copy_collection(
@ -411,7 +379,7 @@ fn copy_collection(
let mut file = archive let mut file = archive
.by_name(meta.collection_name()) .by_name(meta.collection_name())
.map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?; .map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?;
if meta.version < 3 { if !meta.zstd_compressed() {
io::copy(&mut file, writer)?; io::copy(&mut file, writer)?;
} else { } else {
copy_decode(file, writer)?; copy_decode(file, writer)?;

View file

@ -0,0 +1,291 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{
collections::HashMap,
fs::{read_dir, DirEntry, File},
io::{self, Read, Write},
path::{Path, PathBuf},
};
use serde_derive::{Deserialize, Serialize};
use tempfile::NamedTempFile;
use zip::{write::FileOptions, CompressionMethod, ZipWriter};
use zstd::{
stream::{raw::Encoder as RawEncoder, zio::Writer},
Encoder,
};
use crate::{collection::CollectionBuilder, prelude::*, text::normalize_to_nfc};
/// Bump if making changes that break restoring on older releases.
pub const PACKAGE_VERSION: u8 = 3;
const COLLECTION_NAME: &str = "collection.anki21b";
const COLLECTION_NAME_V1: &str = "collection.anki2";
const COLLECTION_NAME_V2: &str = "collection.anki21";
/// Enable multithreaded compression if over this size. For smaller files,
/// multithreading makes things slower, and in initial tests, the crossover
/// point was somewhere between 1MB and 10MB on a many-core system.
const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024;
#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)]
#[serde(default)]
pub(super) struct Meta {
#[serde(rename = "ver")]
pub(super) version: u8,
}
impl Meta {
pub(super) fn new() -> Self {
Self {
version: PACKAGE_VERSION,
}
}
pub(super) fn new_v2() -> Self {
Self { version: 2 }
}
pub(super) fn collection_name(&self) -> &'static str {
match self.version {
1 => COLLECTION_NAME_V1,
2 => COLLECTION_NAME_V2,
_ => COLLECTION_NAME,
}
}
pub(super) fn zstd_compressed(&self) -> bool {
self.version >= 3
}
pub(super) fn media_list_is_hashmap(&self) -> bool {
self.version < 3
}
}
pub fn export_collection_file(
out_path: impl AsRef<Path>,
col_path: impl AsRef<Path>,
media_dir: Option<PathBuf>,
legacy: bool,
tr: &I18n,
progress_fn: impl FnMut(usize),
) -> Result<()> {
let meta = if legacy { Meta::new_v2() } else { Meta::new() };
let mut col_file = File::open(col_path)?;
let col_size = col_file.metadata()?.len() as usize;
export_collection(
meta,
out_path,
&mut col_file,
col_size,
media_dir,
tr,
progress_fn,
)
}
pub(crate) fn export_collection_data(
out_path: impl AsRef<Path>,
mut col_data: &[u8],
tr: &I18n,
) -> Result<()> {
let col_size = col_data.len();
export_collection(
Meta::new(),
out_path,
&mut col_data,
col_size,
None,
tr,
|_| (),
)
}
fn export_collection(
meta: Meta,
out_path: impl AsRef<Path>,
col: &mut impl Read,
col_size: usize,
media_dir: Option<PathBuf>,
tr: &I18n,
progress_fn: impl FnMut(usize),
) -> Result<()> {
let out_file = File::create(&out_path)?;
let mut zip = ZipWriter::new(out_file);
zip.start_file("meta", file_options_stored())?;
zip.write_all(serde_json::to_string(&meta).unwrap().as_bytes())?;
write_collection(meta, &mut zip, col, col_size)?;
write_dummy_collection(&mut zip, tr)?;
write_media(meta, &mut zip, media_dir, progress_fn)?;
zip.finish()?;
Ok(())
}
fn file_options_stored() -> FileOptions {
FileOptions::default().compression_method(CompressionMethod::Stored)
}
fn write_collection(
meta: Meta,
zip: &mut ZipWriter<File>,
col: &mut impl Read,
size: usize,
) -> Result<()> {
if meta.zstd_compressed() {
zip.start_file(meta.collection_name(), file_options_stored())?;
zstd_copy(col, zip, size)?;
} else {
zip.start_file(meta.collection_name(), FileOptions::default())?;
io::copy(col, zip)?;
}
Ok(())
}
fn write_dummy_collection(zip: &mut ZipWriter<File>, tr: &I18n) -> Result<()> {
let mut tempfile = create_dummy_collection_file(tr)?;
zip.start_file(COLLECTION_NAME_V1, file_options_stored())?;
io::copy(&mut tempfile, zip)?;
Ok(())
}
fn create_dummy_collection_file(tr: &I18n) -> Result<NamedTempFile> {
let tempfile = NamedTempFile::new()?;
let mut dummy_col = CollectionBuilder::new(tempfile.path()).build()?;
dummy_col.add_dummy_note(tr)?;
dummy_col
.storage
.db
.execute_batch("pragma page_size=512; pragma journal_mode=delete; vacuum;")?;
dummy_col.close(true)?;
Ok(tempfile)
}
impl Collection {
fn add_dummy_note(&mut self, tr: &I18n) -> Result<()> {
let notetype = self.get_notetype_by_name("basic")?.unwrap();
let mut note = notetype.new_note();
note.set_field(0, tr.exporting_colpkg_too_new())?;
self.add_note(&mut note, DeckId(1))?;
Ok(())
}
}
/// Copy contents of reader into writer, compressing as we copy.
fn zstd_copy(reader: &mut impl Read, writer: &mut impl Write, size: usize) -> Result<()> {
let mut encoder = Encoder::new(writer, 0)?;
if size > MULTITHREAD_MIN_BYTES {
encoder.multithread(num_cpus::get() as u32)?;
}
io::copy(reader, &mut encoder)?;
encoder.finish()?;
Ok(())
}
fn write_media(
meta: Meta,
zip: &mut ZipWriter<File>,
media_dir: Option<PathBuf>,
progress_fn: impl FnMut(usize),
) -> Result<()> {
let mut media_names = vec![];
if let Some(media_dir) = media_dir {
write_media_files(meta, zip, &media_dir, &mut media_names, progress_fn)?;
}
write_media_map(meta, &media_names, zip)?;
Ok(())
}
fn write_media_map(meta: Meta, media_names: &[String], zip: &mut ZipWriter<File>) -> Result<()> {
zip.start_file("media", file_options_stored())?;
let json_bytes = if meta.media_list_is_hashmap() {
let map: HashMap<String, &str> = media_names
.iter()
.enumerate()
.map(|(k, v)| (k.to_string(), v.as_str()))
.collect();
serde_json::to_vec(&map)?
} else {
serde_json::to_vec(media_names)?
};
let size = json_bytes.len();
let mut cursor = std::io::Cursor::new(json_bytes);
if meta.zstd_compressed() {
zstd_copy(&mut cursor, zip, size)?;
} else {
io::copy(&mut cursor, zip)?;
}
Ok(())
}
fn write_media_files(
meta: Meta,
zip: &mut ZipWriter<File>,
dir: &Path,
names: &mut Vec<String>,
mut progress_fn: impl FnMut(usize),
) -> Result<()> {
let mut writer = MediaFileWriter::new(meta);
let mut index = 0;
for entry in read_dir(dir)? {
let entry = entry?;
if !entry.metadata()?.is_file() {
continue;
}
progress_fn(index);
names.push(normalized_unicode_file_name(&entry)?);
zip.start_file(index.to_string(), file_options_stored())?;
writer = writer.write(&mut File::open(entry.path())?, zip)?;
// can't enumerate(), as we skip folders
index += 1;
}
Ok(())
}
fn normalized_unicode_file_name(entry: &DirEntry) -> Result<String> {
entry
.file_name()
.to_str()
.map(|name| normalize_to_nfc(name).into())
.ok_or_else(|| {
AnkiError::IoError(format!(
"non-unicode file name: {}",
entry.file_name().to_string_lossy()
))
})
}
/// Writes media files while compressing according to the targeted version.
/// If compressing, the encoder is reused to optimize for repeated calls.
struct MediaFileWriter(Option<RawEncoder<'static>>);
impl MediaFileWriter {
fn new(meta: Meta) -> Self {
Self(
meta.zstd_compressed()
.then(|| RawEncoder::with_dictionary(0, &[]).unwrap()),
)
}
fn write(mut self, reader: &mut impl Read, writer: &mut impl Write) -> Result<Self> {
// take [self] by value to prevent it from being reused after an error
if let Some(encoder) = self.0.take() {
let mut encoder_writer = Writer::new(writer, encoder);
io::copy(reader, &mut encoder_writer)?;
encoder_writer.finish()?;
self.0 = Some(encoder_writer.into_inner().1);
} else {
io::copy(reader, writer)?;
}
Ok(self)
}
}

View file

@ -2,6 +2,7 @@
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
pub mod backup; pub mod backup;
pub mod exporting;
pub(crate) mod timestamps; pub(crate) mod timestamps;
mod transact; mod transact;
pub(crate) mod undo; pub(crate) mod undo;