mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 14:02:21 -04:00
Backend colpkg exporting (#1719)
* Implement colpkg exporting on backend * Use exporting logic in backup.rs * Refactor exporting.rs * Add backend function to export collection * Refactor backend/collection.rs * Use backend for colpkg exporting * Don't use default zip compression for media * Add exporting progress * Refactor media file writing * Write dummy collections * Localize dummy collection note * Minimize dummy db size * Use `NamedTempFile::new()` instead of `new_in` * Drop redundant v2 dummy collection * COLLECTION_VERSION -> PACKAGE_VERSION * Split `lock_collection()` into two to drop flag * Expose new colpkg in GUI * Improve dummy collection message * Please type checker * importing-colpkg-too-new -> exporting-... * Compress the media map in the v3 package (dae) On collections with lots of media, it can grow into megabytes. Also return an error in extract_media_file_names(), instead of masking it as an optional. * Store media map as a vector in the v3 package (dae) This compresses better (eg 280kb original, 100kb hashmap, 42kb vec) In the colpkg import case we don't need random access. When importing an apkg, we will need to be able to fetch file data for a given media filename, but the existing map doesn't help us there, as we need filename->index, not index->filename. * Ensure folders in the media dir don't break the file mapping (dae)
This commit is contained in:
parent
d7a101827a
commit
e759885734
9 changed files with 463 additions and 130 deletions
|
@ -5,6 +5,7 @@ exporting-anki-deck-package = Anki Deck Package
|
|||
exporting-cards-in-plain-text = Cards in Plain Text
|
||||
exporting-collection = collection
|
||||
exporting-collection-exported = Collection exported.
|
||||
exporting-colpkg-too-new = Please update to the latest Anki version, then import the .colpkg file again.
|
||||
exporting-couldnt-save-file = Couldn't save file: { $val }
|
||||
exporting-export = Export...
|
||||
exporting-export-format = <b>Export format</b>:
|
||||
|
|
|
@ -20,6 +20,7 @@ service CollectionService {
|
|||
rpc LatestProgress(generic.Empty) returns (Progress);
|
||||
rpc SetWantsAbort(generic.Empty) returns (generic.Empty);
|
||||
rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty);
|
||||
rpc ExportCollection(ExportCollectionRequest) returns (generic.Empty);
|
||||
}
|
||||
|
||||
message OpenCollectionRequest {
|
||||
|
@ -121,5 +122,12 @@ message Progress {
|
|||
NormalSync normal_sync = 5;
|
||||
DatabaseCheck database_check = 6;
|
||||
string importing = 7;
|
||||
uint32 exporting = 8;
|
||||
}
|
||||
}
|
||||
|
||||
message ExportCollectionRequest {
|
||||
string out_path = 1;
|
||||
bool include_media = 2;
|
||||
bool legacy = 3;
|
||||
}
|
||||
|
|
|
@ -264,6 +264,14 @@ class Collection(DeprecatedNamesMixin):
|
|||
self._clear_caches()
|
||||
self.db = None
|
||||
|
||||
def export_collection(
|
||||
self, out_path: str, include_media: bool, legacy: bool
|
||||
) -> None:
|
||||
self.close_for_full_sync()
|
||||
self._backend.export_collection(
|
||||
out_path=out_path, include_media=include_media, legacy=legacy
|
||||
)
|
||||
|
||||
def rollback(self) -> None:
|
||||
self._clear_caches()
|
||||
self.db.rollback()
|
||||
|
|
|
@ -9,6 +9,8 @@ import json
|
|||
import os
|
||||
import re
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
import unicodedata
|
||||
import zipfile
|
||||
from io import BufferedWriter
|
||||
|
@ -419,6 +421,7 @@ class AnkiCollectionPackageExporter(AnkiPackageExporter):
|
|||
ext = ".colpkg"
|
||||
verbatim = True
|
||||
includeSched = None
|
||||
LEGACY = True
|
||||
|
||||
def __init__(self, col):
|
||||
AnkiPackageExporter.__init__(self, col)
|
||||
|
@ -427,22 +430,32 @@ class AnkiCollectionPackageExporter(AnkiPackageExporter):
|
|||
def key(col: Collection) -> str:
|
||||
return col.tr.exporting_anki_collection_package()
|
||||
|
||||
def doExport(self, z, path):
|
||||
"Export collection. Caller must re-open afterwards."
|
||||
# close our deck & write it into the zip file
|
||||
self.count = self.col.card_count()
|
||||
v2 = self.col.sched_ver() != 1
|
||||
mdir = self.col.media.dir()
|
||||
self.col.close(downgrade=True)
|
||||
if not v2:
|
||||
z.write(self.col.path, "collection.anki2")
|
||||
else:
|
||||
self._addDummyCollection(z)
|
||||
z.write(self.col.path, "collection.anki21")
|
||||
# copy all media
|
||||
if not self.includeMedia:
|
||||
return {}
|
||||
return self._exportMedia(z, os.listdir(mdir), mdir)
|
||||
def exportInto(self, path: str) -> None:
|
||||
"""Export collection. Caller must re-open afterwards."""
|
||||
|
||||
def exporting_media() -> bool:
|
||||
return any(
|
||||
hook.__name__ == "exported_media"
|
||||
for hook in hooks.media_files_did_export._hooks
|
||||
)
|
||||
|
||||
def progress() -> None:
|
||||
while exporting_media():
|
||||
progress = self.col._backend.latest_progress()
|
||||
if progress.HasField("exporting"):
|
||||
hooks.media_files_did_export(progress.exporting)
|
||||
time.sleep(0.1)
|
||||
|
||||
threading.Thread(target=progress).start()
|
||||
self.col.export_collection(path, self.includeMedia, self.LEGACY)
|
||||
|
||||
|
||||
class AnkiCollectionPackage21bExporter(AnkiCollectionPackageExporter):
|
||||
LEGACY = False
|
||||
|
||||
@staticmethod
|
||||
def key(_col: Collection) -> str:
|
||||
return "Anki 2.1.50+ Collection Package"
|
||||
|
||||
|
||||
# Export modules
|
||||
|
@ -459,6 +472,7 @@ def exporters(col: Collection) -> list[tuple[str, Any]]:
|
|||
|
||||
exps = [
|
||||
id(AnkiCollectionPackageExporter),
|
||||
id(AnkiCollectionPackage21bExporter),
|
||||
id(AnkiPackageExporter),
|
||||
id(TextNoteExporter),
|
||||
id(TextCardExporter),
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::path::Path;
|
||||
use std::{path::Path, sync::MutexGuard};
|
||||
|
||||
use slog::error;
|
||||
|
||||
|
@ -12,6 +12,7 @@ use crate::{
|
|||
backend_proto::{self as pb, preferences::Backups},
|
||||
collection::{
|
||||
backup::{self, ImportProgress},
|
||||
exporting::export_collection_file,
|
||||
CollectionBuilder,
|
||||
},
|
||||
log::{self},
|
||||
|
@ -30,10 +31,7 @@ impl CollectionService for Backend {
|
|||
}
|
||||
|
||||
fn open_collection(&self, input: pb::OpenCollectionRequest) -> Result<pb::Empty> {
|
||||
let mut col = self.col.lock().unwrap();
|
||||
if col.is_some() {
|
||||
return Err(AnkiError::CollectionAlreadyOpen);
|
||||
}
|
||||
let mut guard = self.lock_closed_collection()?;
|
||||
|
||||
let mut builder = CollectionBuilder::new(input.collection_path);
|
||||
builder
|
||||
|
@ -46,7 +44,7 @@ impl CollectionService for Backend {
|
|||
builder.set_logger(self.log.clone());
|
||||
}
|
||||
|
||||
*col = Some(builder.build()?);
|
||||
*guard = Some(builder.build()?);
|
||||
|
||||
Ok(().into())
|
||||
}
|
||||
|
@ -54,12 +52,9 @@ impl CollectionService for Backend {
|
|||
fn close_collection(&self, input: pb::CloseCollectionRequest) -> Result<pb::Empty> {
|
||||
self.abort_media_sync_and_wait();
|
||||
|
||||
let mut col = self.col.lock().unwrap();
|
||||
if col.is_none() {
|
||||
return Err(AnkiError::CollectionNotOpen);
|
||||
}
|
||||
let mut guard = self.lock_open_collection()?;
|
||||
|
||||
let mut col_inner = col.take().unwrap();
|
||||
let mut col_inner = guard.take().unwrap();
|
||||
let limits = col_inner.get_backups();
|
||||
let col_path = std::mem::take(&mut col_inner.col_path);
|
||||
|
||||
|
@ -82,30 +77,39 @@ impl CollectionService for Backend {
|
|||
Ok(().into())
|
||||
}
|
||||
|
||||
fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result<pb::String> {
|
||||
let col = self.col.lock().unwrap();
|
||||
if col.is_some() {
|
||||
Err(AnkiError::CollectionAlreadyOpen)
|
||||
} else {
|
||||
let mut handler = self.new_progress_handler();
|
||||
let progress_fn = move |progress| {
|
||||
let throttle = matches!(progress, ImportProgress::Media(_));
|
||||
if handler.update(Progress::Import(progress), throttle) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(AnkiError::Interrupted)
|
||||
}
|
||||
};
|
||||
fn export_collection(&self, input: pb::ExportCollectionRequest) -> Result<pb::Empty> {
|
||||
self.abort_media_sync_and_wait();
|
||||
|
||||
backup::restore_backup(
|
||||
progress_fn,
|
||||
&input.col_path,
|
||||
&input.backup_path,
|
||||
&input.media_folder,
|
||||
&self.tr,
|
||||
)
|
||||
.map(Into::into)
|
||||
}
|
||||
let mut guard = self.lock_open_collection()?;
|
||||
|
||||
let col_inner = guard.take().unwrap();
|
||||
let col_path = col_inner.col_path.clone();
|
||||
let media_dir = input.include_media.then(|| col_inner.media_folder.clone());
|
||||
|
||||
col_inner.close(true)?;
|
||||
|
||||
export_collection_file(
|
||||
input.out_path,
|
||||
col_path,
|
||||
media_dir,
|
||||
input.legacy,
|
||||
&self.tr,
|
||||
self.export_progress_fn(),
|
||||
)
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result<pb::String> {
|
||||
let _guard = self.lock_closed_collection()?;
|
||||
|
||||
backup::restore_backup(
|
||||
self.import_progress_fn(),
|
||||
&input.col_path,
|
||||
&input.backup_path,
|
||||
&input.media_folder,
|
||||
&self.tr,
|
||||
)
|
||||
.map(Into::into)
|
||||
}
|
||||
|
||||
fn check_database(&self, _input: pb::Empty) -> Result<pb::CheckDatabaseResponse> {
|
||||
|
@ -150,6 +154,22 @@ impl CollectionService for Backend {
|
|||
}
|
||||
|
||||
impl Backend {
|
||||
fn lock_open_collection(&self) -> Result<MutexGuard<Option<Collection>>> {
|
||||
let guard = self.col.lock().unwrap();
|
||||
guard
|
||||
.is_some()
|
||||
.then(|| guard)
|
||||
.ok_or(AnkiError::CollectionNotOpen)
|
||||
}
|
||||
|
||||
fn lock_closed_collection(&self) -> Result<MutexGuard<Option<Collection>>> {
|
||||
let guard = self.col.lock().unwrap();
|
||||
guard
|
||||
.is_none()
|
||||
.then(|| guard)
|
||||
.ok_or(AnkiError::CollectionAlreadyOpen)
|
||||
}
|
||||
|
||||
fn await_backup_completion(&self) {
|
||||
if let Some(task) = self.backup_task.lock().unwrap().take() {
|
||||
task.join().unwrap();
|
||||
|
@ -170,8 +190,28 @@ impl Backend {
|
|||
limits,
|
||||
minimum_backup_interval,
|
||||
self.log.clone(),
|
||||
self.tr.clone(),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn import_progress_fn(&self) -> impl FnMut(ImportProgress) -> Result<()> {
|
||||
let mut handler = self.new_progress_handler();
|
||||
move |progress| {
|
||||
let throttle = matches!(progress, ImportProgress::Media(_));
|
||||
if handler.update(Progress::Import(progress), throttle) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(AnkiError::Interrupted)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn export_progress_fn(&self) -> impl FnMut(usize) {
|
||||
let mut handler = self.new_progress_handler();
|
||||
move |media_files| {
|
||||
handler.update(Progress::Export(media_files), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ pub(super) enum Progress {
|
|||
NormalSync(NormalSyncProgress),
|
||||
DatabaseCheck(DatabaseCheckProgress),
|
||||
Import(ImportProgress),
|
||||
Export(usize),
|
||||
}
|
||||
|
||||
pub(super) fn progress_to_proto(progress: Option<Progress>, tr: &I18n) -> pb::Progress {
|
||||
|
@ -112,6 +113,7 @@ pub(super) fn progress_to_proto(progress: Option<Progress>, tr: &I18n) -> pb::Pr
|
|||
}
|
||||
.into(),
|
||||
),
|
||||
Progress::Export(progress) => pb::progress::Value::Exporting(progress as u32),
|
||||
}
|
||||
} else {
|
||||
pb::progress::Value::None(pb::Empty {})
|
||||
|
|
|
@ -5,7 +5,7 @@ use std::{
|
|||
collections::HashMap,
|
||||
ffi::OsStr,
|
||||
fs::{self, read_dir, remove_file, DirEntry, File},
|
||||
io::{self, Read, Write},
|
||||
io::{self, Write},
|
||||
path::{Path, PathBuf},
|
||||
thread::{self, JoinHandle},
|
||||
time::SystemTime,
|
||||
|
@ -14,32 +14,25 @@ use std::{
|
|||
use chrono::prelude::*;
|
||||
use itertools::Itertools;
|
||||
use log::error;
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
use tempfile::NamedTempFile;
|
||||
use zip::{write::FileOptions, CompressionMethod, ZipArchive, ZipWriter};
|
||||
use zstd::{self, stream::copy_decode, Encoder};
|
||||
use zip::ZipArchive;
|
||||
use zstd::{self, stream::copy_decode};
|
||||
|
||||
use crate::{
|
||||
backend_proto::preferences::Backups, collection::CollectionBuilder, error::ImportError, log,
|
||||
prelude::*, text::normalize_to_nfc,
|
||||
backend_proto::preferences::Backups,
|
||||
collection::{
|
||||
exporting::{export_collection_data, Meta, PACKAGE_VERSION},
|
||||
CollectionBuilder,
|
||||
},
|
||||
error::ImportError,
|
||||
log,
|
||||
prelude::*,
|
||||
text::normalize_to_nfc,
|
||||
};
|
||||
|
||||
/// Bump if making changes that break restoring on older releases.
|
||||
const BACKUP_VERSION: u8 = 3;
|
||||
const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg";
|
||||
/// Default seconds after a backup, in which further backups will be skipped.
|
||||
const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60;
|
||||
/// Enable multithreaded compression if over this size. For smaller files,
|
||||
/// multithreading makes things slower, and in initial tests, the crossover
|
||||
/// point was somewhere between 1MB and 10MB on a many-core system.
|
||||
const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024;
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
struct Meta {
|
||||
#[serde(rename = "ver")]
|
||||
version: u8,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum ImportProgress {
|
||||
|
@ -53,6 +46,7 @@ pub fn backup(
|
|||
limits: Backups,
|
||||
minimum_backup_interval: Option<u64>,
|
||||
log: Logger,
|
||||
tr: I18n,
|
||||
) -> Result<Option<JoinHandle<()>>> {
|
||||
let recent_secs = minimum_backup_interval.unwrap_or(MINIMUM_BACKUP_INTERVAL);
|
||||
if recent_secs > 0 && has_recent_backup(backup_folder.as_ref(), recent_secs)? {
|
||||
|
@ -60,7 +54,7 @@ pub fn backup(
|
|||
} else {
|
||||
let col_data = std::fs::read(col_path)?;
|
||||
Ok(Some(thread::spawn(move || {
|
||||
backup_inner(&col_data, &backup_folder, limits, log)
|
||||
backup_inner(&col_data, &backup_folder, limits, log, &tr)
|
||||
})))
|
||||
}
|
||||
}
|
||||
|
@ -99,7 +93,7 @@ pub fn restore_backup(
|
|||
progress_fn(ImportProgress::Collection)?;
|
||||
|
||||
let mut result = String::new();
|
||||
if let Err(e) = restore_media(progress_fn, &mut archive, media_folder) {
|
||||
if let Err(e) = restore_media(meta, progress_fn, &mut archive, media_folder) {
|
||||
result = tr
|
||||
.importing_failed_to_import_media_file(e.localized_description(tr))
|
||||
.into_owned()
|
||||
|
@ -114,8 +108,14 @@ pub fn restore_backup(
|
|||
Ok(result)
|
||||
}
|
||||
|
||||
fn backup_inner<P: AsRef<Path>>(col_data: &[u8], backup_folder: P, limits: Backups, log: Logger) {
|
||||
if let Err(error) = write_backup(col_data, backup_folder.as_ref()) {
|
||||
fn backup_inner<P: AsRef<Path>>(
|
||||
col_data: &[u8],
|
||||
backup_folder: P,
|
||||
limits: Backups,
|
||||
log: Logger,
|
||||
tr: &I18n,
|
||||
) {
|
||||
if let Err(error) = write_backup(col_data, backup_folder.as_ref(), tr) {
|
||||
error!(log, "failed to backup collection: {error:?}");
|
||||
}
|
||||
if let Err(error) = thin_backups(backup_folder, limits, &log) {
|
||||
|
@ -123,36 +123,10 @@ fn backup_inner<P: AsRef<Path>>(col_data: &[u8], backup_folder: P, limits: Backu
|
|||
}
|
||||
}
|
||||
|
||||
fn write_backup<S: AsRef<OsStr>>(mut col_data: &[u8], backup_folder: S) -> Result<()> {
|
||||
let out_file = File::create(out_path(backup_folder))?;
|
||||
let mut zip = ZipWriter::new(out_file);
|
||||
let options = FileOptions::default().compression_method(CompressionMethod::Stored);
|
||||
let meta = serde_json::to_string(&Meta {
|
||||
version: BACKUP_VERSION,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
zip.start_file("meta", options)?;
|
||||
zip.write_all(meta.as_bytes())?;
|
||||
zip.start_file("collection.anki21b", options)?;
|
||||
let col_data_len = col_data.len();
|
||||
zstd_copy(&mut col_data, &mut zip, col_data_len)?;
|
||||
zip.start_file("media", options)?;
|
||||
zip.write_all(b"{}")?;
|
||||
zip.finish()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Copy contents of reader into writer, compressing as we copy.
|
||||
fn zstd_copy<R: Read, W: Write>(reader: &mut R, writer: &mut W, size: usize) -> Result<()> {
|
||||
let mut encoder = Encoder::new(writer, 0)?;
|
||||
if size > MULTITHREAD_MIN_BYTES {
|
||||
encoder.multithread(num_cpus::get() as u32)?;
|
||||
}
|
||||
io::copy(reader, &mut encoder)?;
|
||||
encoder.finish()?;
|
||||
Ok(())
|
||||
fn write_backup<S: AsRef<OsStr>>(col_data: &[u8], backup_folder: S, tr: &I18n) -> Result<()> {
|
||||
let out_path =
|
||||
Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING)));
|
||||
export_collection_data(&out_path, col_data, tr)
|
||||
}
|
||||
|
||||
fn thin_backups<P: AsRef<Path>>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> {
|
||||
|
@ -168,10 +142,6 @@ fn thin_backups<P: AsRef<Path>>(backup_folder: P, limits: Backups, log: &Logger)
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn out_path<S: AsRef<OsStr>>(backup_folder: S) -> PathBuf {
|
||||
Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING)))
|
||||
}
|
||||
|
||||
fn datetime_from_file_name(file_name: &str) -> Option<DateTime<Local>> {
|
||||
NaiveDateTime::parse_from_str(file_name, BACKUP_FORMAT_STRING)
|
||||
.ok()
|
||||
|
@ -319,7 +289,7 @@ impl Meta {
|
|||
.ok()
|
||||
.and_then(|file| serde_json::from_reader(file).ok())
|
||||
.unwrap_or_default();
|
||||
if meta.version > BACKUP_VERSION {
|
||||
if meta.version > PACKAGE_VERSION {
|
||||
return Err(AnkiError::ImportError(ImportError::TooNew));
|
||||
} else if meta.version == 0 {
|
||||
meta.version = if archive.by_name("collection.anki21").is_ok() {
|
||||
|
@ -331,14 +301,6 @@ impl Meta {
|
|||
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
fn collection_name(&self) -> &'static str {
|
||||
match self.version {
|
||||
1 => "collection.anki2",
|
||||
2 => "collection.anki21",
|
||||
_ => "collection.anki21b",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn check_collection(col_path: &Path) -> Result<()> {
|
||||
|
@ -356,21 +318,22 @@ fn check_collection(col_path: &Path) -> Result<()> {
|
|||
}
|
||||
|
||||
fn restore_media(
|
||||
meta: Meta,
|
||||
mut progress_fn: impl FnMut(ImportProgress) -> Result<()>,
|
||||
archive: &mut ZipArchive<File>,
|
||||
media_folder: &str,
|
||||
) -> Result<()> {
|
||||
let media_file_names = extract_media_file_names(archive).ok_or(AnkiError::NotFound)?;
|
||||
let media_file_names = extract_media_file_names(meta, archive)?;
|
||||
let mut count = 0;
|
||||
|
||||
for (archive_file_name, file_name) in media_file_names {
|
||||
for (archive_file_name, file_name) in media_file_names.iter().enumerate() {
|
||||
count += 1;
|
||||
if count % 10 == 0 {
|
||||
progress_fn(ImportProgress::Media(count))?;
|
||||
}
|
||||
|
||||
if let Ok(mut zip_file) = archive.by_name(&archive_file_name) {
|
||||
let file_path = Path::new(&media_folder).join(normalize_to_nfc(&file_name).as_ref());
|
||||
if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) {
|
||||
let file_path = Path::new(&media_folder).join(normalize_to_nfc(file_name).as_ref());
|
||||
let files_are_equal = fs::metadata(&file_path)
|
||||
.map(|metadata| metadata.len() == zip_file.size())
|
||||
.unwrap_or_default();
|
||||
|
@ -392,15 +355,20 @@ fn restore_media(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_media_file_names(archive: &mut ZipArchive<File>) -> Option<HashMap<String, String>> {
|
||||
archive
|
||||
.by_name("media")
|
||||
.ok()
|
||||
.and_then(|mut file| {
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).ok().map(|_| buf)
|
||||
})
|
||||
.and_then(|bytes| serde_json::from_slice(&bytes).ok())
|
||||
fn extract_media_file_names(meta: Meta, archive: &mut ZipArchive<File>) -> Result<Vec<String>> {
|
||||
let mut file = archive.by_name("media")?;
|
||||
let mut buf = Vec::new();
|
||||
if meta.zstd_compressed() {
|
||||
copy_decode(file, &mut buf)?;
|
||||
} else {
|
||||
io::copy(&mut file, &mut buf)?;
|
||||
}
|
||||
if meta.media_list_is_hashmap() {
|
||||
let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
|
||||
Ok(map.into_iter().map(|(_k, v)| v).collect())
|
||||
} else {
|
||||
serde_json::from_slice(&buf).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
fn copy_collection(
|
||||
|
@ -411,7 +379,7 @@ fn copy_collection(
|
|||
let mut file = archive
|
||||
.by_name(meta.collection_name())
|
||||
.map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?;
|
||||
if meta.version < 3 {
|
||||
if !meta.zstd_compressed() {
|
||||
io::copy(&mut file, writer)?;
|
||||
} else {
|
||||
copy_decode(file, writer)?;
|
||||
|
|
291
rslib/src/collection/exporting.rs
Normal file
291
rslib/src/collection/exporting.rs
Normal file
|
@ -0,0 +1,291 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fs::{read_dir, DirEntry, File},
|
||||
io::{self, Read, Write},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
use tempfile::NamedTempFile;
|
||||
use zip::{write::FileOptions, CompressionMethod, ZipWriter};
|
||||
use zstd::{
|
||||
stream::{raw::Encoder as RawEncoder, zio::Writer},
|
||||
Encoder,
|
||||
};
|
||||
|
||||
use crate::{collection::CollectionBuilder, prelude::*, text::normalize_to_nfc};
|
||||
|
||||
/// Bump if making changes that break restoring on older releases.
|
||||
pub const PACKAGE_VERSION: u8 = 3;
|
||||
const COLLECTION_NAME: &str = "collection.anki21b";
|
||||
const COLLECTION_NAME_V1: &str = "collection.anki2";
|
||||
const COLLECTION_NAME_V2: &str = "collection.anki21";
|
||||
/// Enable multithreaded compression if over this size. For smaller files,
|
||||
/// multithreading makes things slower, and in initial tests, the crossover
|
||||
/// point was somewhere between 1MB and 10MB on a many-core system.
|
||||
const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024;
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)]
|
||||
#[serde(default)]
|
||||
pub(super) struct Meta {
|
||||
#[serde(rename = "ver")]
|
||||
pub(super) version: u8,
|
||||
}
|
||||
|
||||
impl Meta {
|
||||
pub(super) fn new() -> Self {
|
||||
Self {
|
||||
version: PACKAGE_VERSION,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn new_v2() -> Self {
|
||||
Self { version: 2 }
|
||||
}
|
||||
|
||||
pub(super) fn collection_name(&self) -> &'static str {
|
||||
match self.version {
|
||||
1 => COLLECTION_NAME_V1,
|
||||
2 => COLLECTION_NAME_V2,
|
||||
_ => COLLECTION_NAME,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn zstd_compressed(&self) -> bool {
|
||||
self.version >= 3
|
||||
}
|
||||
|
||||
pub(super) fn media_list_is_hashmap(&self) -> bool {
|
||||
self.version < 3
|
||||
}
|
||||
}
|
||||
|
||||
pub fn export_collection_file(
|
||||
out_path: impl AsRef<Path>,
|
||||
col_path: impl AsRef<Path>,
|
||||
media_dir: Option<PathBuf>,
|
||||
legacy: bool,
|
||||
tr: &I18n,
|
||||
progress_fn: impl FnMut(usize),
|
||||
) -> Result<()> {
|
||||
let meta = if legacy { Meta::new_v2() } else { Meta::new() };
|
||||
let mut col_file = File::open(col_path)?;
|
||||
let col_size = col_file.metadata()?.len() as usize;
|
||||
export_collection(
|
||||
meta,
|
||||
out_path,
|
||||
&mut col_file,
|
||||
col_size,
|
||||
media_dir,
|
||||
tr,
|
||||
progress_fn,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn export_collection_data(
|
||||
out_path: impl AsRef<Path>,
|
||||
mut col_data: &[u8],
|
||||
tr: &I18n,
|
||||
) -> Result<()> {
|
||||
let col_size = col_data.len();
|
||||
export_collection(
|
||||
Meta::new(),
|
||||
out_path,
|
||||
&mut col_data,
|
||||
col_size,
|
||||
None,
|
||||
tr,
|
||||
|_| (),
|
||||
)
|
||||
}
|
||||
|
||||
fn export_collection(
|
||||
meta: Meta,
|
||||
out_path: impl AsRef<Path>,
|
||||
col: &mut impl Read,
|
||||
col_size: usize,
|
||||
media_dir: Option<PathBuf>,
|
||||
tr: &I18n,
|
||||
progress_fn: impl FnMut(usize),
|
||||
) -> Result<()> {
|
||||
let out_file = File::create(&out_path)?;
|
||||
let mut zip = ZipWriter::new(out_file);
|
||||
|
||||
zip.start_file("meta", file_options_stored())?;
|
||||
zip.write_all(serde_json::to_string(&meta).unwrap().as_bytes())?;
|
||||
write_collection(meta, &mut zip, col, col_size)?;
|
||||
write_dummy_collection(&mut zip, tr)?;
|
||||
write_media(meta, &mut zip, media_dir, progress_fn)?;
|
||||
zip.finish()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn file_options_stored() -> FileOptions {
|
||||
FileOptions::default().compression_method(CompressionMethod::Stored)
|
||||
}
|
||||
|
||||
fn write_collection(
|
||||
meta: Meta,
|
||||
zip: &mut ZipWriter<File>,
|
||||
col: &mut impl Read,
|
||||
size: usize,
|
||||
) -> Result<()> {
|
||||
if meta.zstd_compressed() {
|
||||
zip.start_file(meta.collection_name(), file_options_stored())?;
|
||||
zstd_copy(col, zip, size)?;
|
||||
} else {
|
||||
zip.start_file(meta.collection_name(), FileOptions::default())?;
|
||||
io::copy(col, zip)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_dummy_collection(zip: &mut ZipWriter<File>, tr: &I18n) -> Result<()> {
|
||||
let mut tempfile = create_dummy_collection_file(tr)?;
|
||||
zip.start_file(COLLECTION_NAME_V1, file_options_stored())?;
|
||||
io::copy(&mut tempfile, zip)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_dummy_collection_file(tr: &I18n) -> Result<NamedTempFile> {
|
||||
let tempfile = NamedTempFile::new()?;
|
||||
let mut dummy_col = CollectionBuilder::new(tempfile.path()).build()?;
|
||||
dummy_col.add_dummy_note(tr)?;
|
||||
dummy_col
|
||||
.storage
|
||||
.db
|
||||
.execute_batch("pragma page_size=512; pragma journal_mode=delete; vacuum;")?;
|
||||
dummy_col.close(true)?;
|
||||
|
||||
Ok(tempfile)
|
||||
}
|
||||
|
||||
impl Collection {
|
||||
fn add_dummy_note(&mut self, tr: &I18n) -> Result<()> {
|
||||
let notetype = self.get_notetype_by_name("basic")?.unwrap();
|
||||
let mut note = notetype.new_note();
|
||||
note.set_field(0, tr.exporting_colpkg_too_new())?;
|
||||
self.add_note(&mut note, DeckId(1))?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy contents of reader into writer, compressing as we copy.
|
||||
fn zstd_copy(reader: &mut impl Read, writer: &mut impl Write, size: usize) -> Result<()> {
|
||||
let mut encoder = Encoder::new(writer, 0)?;
|
||||
if size > MULTITHREAD_MIN_BYTES {
|
||||
encoder.multithread(num_cpus::get() as u32)?;
|
||||
}
|
||||
io::copy(reader, &mut encoder)?;
|
||||
encoder.finish()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_media(
|
||||
meta: Meta,
|
||||
zip: &mut ZipWriter<File>,
|
||||
media_dir: Option<PathBuf>,
|
||||
progress_fn: impl FnMut(usize),
|
||||
) -> Result<()> {
|
||||
let mut media_names = vec![];
|
||||
|
||||
if let Some(media_dir) = media_dir {
|
||||
write_media_files(meta, zip, &media_dir, &mut media_names, progress_fn)?;
|
||||
}
|
||||
|
||||
write_media_map(meta, &media_names, zip)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_media_map(meta: Meta, media_names: &[String], zip: &mut ZipWriter<File>) -> Result<()> {
|
||||
zip.start_file("media", file_options_stored())?;
|
||||
let json_bytes = if meta.media_list_is_hashmap() {
|
||||
let map: HashMap<String, &str> = media_names
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(k, v)| (k.to_string(), v.as_str()))
|
||||
.collect();
|
||||
serde_json::to_vec(&map)?
|
||||
} else {
|
||||
serde_json::to_vec(media_names)?
|
||||
};
|
||||
let size = json_bytes.len();
|
||||
let mut cursor = std::io::Cursor::new(json_bytes);
|
||||
if meta.zstd_compressed() {
|
||||
zstd_copy(&mut cursor, zip, size)?;
|
||||
} else {
|
||||
io::copy(&mut cursor, zip)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_media_files(
|
||||
meta: Meta,
|
||||
zip: &mut ZipWriter<File>,
|
||||
dir: &Path,
|
||||
names: &mut Vec<String>,
|
||||
mut progress_fn: impl FnMut(usize),
|
||||
) -> Result<()> {
|
||||
let mut writer = MediaFileWriter::new(meta);
|
||||
let mut index = 0;
|
||||
for entry in read_dir(dir)? {
|
||||
let entry = entry?;
|
||||
if !entry.metadata()?.is_file() {
|
||||
continue;
|
||||
}
|
||||
progress_fn(index);
|
||||
names.push(normalized_unicode_file_name(&entry)?);
|
||||
zip.start_file(index.to_string(), file_options_stored())?;
|
||||
writer = writer.write(&mut File::open(entry.path())?, zip)?;
|
||||
// can't enumerate(), as we skip folders
|
||||
index += 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn normalized_unicode_file_name(entry: &DirEntry) -> Result<String> {
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.map(|name| normalize_to_nfc(name).into())
|
||||
.ok_or_else(|| {
|
||||
AnkiError::IoError(format!(
|
||||
"non-unicode file name: {}",
|
||||
entry.file_name().to_string_lossy()
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
/// Writes media files while compressing according to the targeted version.
|
||||
/// If compressing, the encoder is reused to optimize for repeated calls.
|
||||
struct MediaFileWriter(Option<RawEncoder<'static>>);
|
||||
|
||||
impl MediaFileWriter {
|
||||
fn new(meta: Meta) -> Self {
|
||||
Self(
|
||||
meta.zstd_compressed()
|
||||
.then(|| RawEncoder::with_dictionary(0, &[]).unwrap()),
|
||||
)
|
||||
}
|
||||
|
||||
fn write(mut self, reader: &mut impl Read, writer: &mut impl Write) -> Result<Self> {
|
||||
// take [self] by value to prevent it from being reused after an error
|
||||
if let Some(encoder) = self.0.take() {
|
||||
let mut encoder_writer = Writer::new(writer, encoder);
|
||||
io::copy(reader, &mut encoder_writer)?;
|
||||
encoder_writer.finish()?;
|
||||
self.0 = Some(encoder_writer.into_inner().1);
|
||||
} else {
|
||||
io::copy(reader, writer)?;
|
||||
}
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
pub mod backup;
|
||||
pub mod exporting;
|
||||
pub(crate) mod timestamps;
|
||||
mod transact;
|
||||
pub(crate) mod undo;
|
||||
|
|
Loading…
Reference in a new issue