From e759885734eec4cb688b4e2524bd815877e01090 Mon Sep 17 00:00:00 2001 From: RumovZ Date: Tue, 15 Mar 2022 07:48:02 +0100 Subject: [PATCH] Backend colpkg exporting (#1719) * Implement colpkg exporting on backend * Use exporting logic in backup.rs * Refactor exporting.rs * Add backend function to export collection * Refactor backend/collection.rs * Use backend for colpkg exporting * Don't use default zip compression for media * Add exporting progress * Refactor media file writing * Write dummy collections * Localize dummy collection note * Minimize dummy db size * Use `NamedTempFile::new()` instead of `new_in` * Drop redundant v2 dummy collection * COLLECTION_VERSION -> PACKAGE_VERSION * Split `lock_collection()` into two to drop flag * Expose new colpkg in GUI * Improve dummy collection message * Please type checker * importing-colpkg-too-new -> exporting-... * Compress the media map in the v3 package (dae) On collections with lots of media, it can grow into megabytes. Also return an error in extract_media_file_names(), instead of masking it as an optional. * Store media map as a vector in the v3 package (dae) This compresses better (eg 280kb original, 100kb hashmap, 42kb vec) In the colpkg import case we don't need random access. When importing an apkg, we will need to be able to fetch file data for a given media filename, but the existing map doesn't help us there, as we need filename->index, not index->filename. * Ensure folders in the media dir don't break the file mapping (dae) --- ftl/core/exporting.ftl | 1 + proto/anki/collection.proto | 8 + pylib/anki/collection.py | 8 + pylib/anki/exporting.py | 46 +++-- rslib/src/backend/collection.rs | 108 +++++++---- rslib/src/backend/progress.rs | 2 + rslib/src/collection/backup.rs | 128 +++++-------- rslib/src/collection/exporting.rs | 291 ++++++++++++++++++++++++++++++ rslib/src/collection/mod.rs | 1 + 9 files changed, 463 insertions(+), 130 deletions(-) create mode 100644 rslib/src/collection/exporting.rs diff --git a/ftl/core/exporting.ftl b/ftl/core/exporting.ftl index c373b63ed..b396001f3 100644 --- a/ftl/core/exporting.ftl +++ b/ftl/core/exporting.ftl @@ -5,6 +5,7 @@ exporting-anki-deck-package = Anki Deck Package exporting-cards-in-plain-text = Cards in Plain Text exporting-collection = collection exporting-collection-exported = Collection exported. +exporting-colpkg-too-new = Please update to the latest Anki version, then import the .colpkg file again. exporting-couldnt-save-file = Couldn't save file: { $val } exporting-export = Export... exporting-export-format = Export format: diff --git a/proto/anki/collection.proto b/proto/anki/collection.proto index a76097dff..7d55b9b60 100644 --- a/proto/anki/collection.proto +++ b/proto/anki/collection.proto @@ -20,6 +20,7 @@ service CollectionService { rpc LatestProgress(generic.Empty) returns (Progress); rpc SetWantsAbort(generic.Empty) returns (generic.Empty); rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty); + rpc ExportCollection(ExportCollectionRequest) returns (generic.Empty); } message OpenCollectionRequest { @@ -121,5 +122,12 @@ message Progress { NormalSync normal_sync = 5; DatabaseCheck database_check = 6; string importing = 7; + uint32 exporting = 8; } } + +message ExportCollectionRequest { + string out_path = 1; + bool include_media = 2; + bool legacy = 3; +} diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index 84896cb5a..2e7468129 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -264,6 +264,14 @@ class Collection(DeprecatedNamesMixin): self._clear_caches() self.db = None + def export_collection( + self, out_path: str, include_media: bool, legacy: bool + ) -> None: + self.close_for_full_sync() + self._backend.export_collection( + out_path=out_path, include_media=include_media, legacy=legacy + ) + def rollback(self) -> None: self._clear_caches() self.db.rollback() diff --git a/pylib/anki/exporting.py b/pylib/anki/exporting.py index db433c911..1c6398aaa 100644 --- a/pylib/anki/exporting.py +++ b/pylib/anki/exporting.py @@ -9,6 +9,8 @@ import json import os import re import shutil +import threading +import time import unicodedata import zipfile from io import BufferedWriter @@ -419,6 +421,7 @@ class AnkiCollectionPackageExporter(AnkiPackageExporter): ext = ".colpkg" verbatim = True includeSched = None + LEGACY = True def __init__(self, col): AnkiPackageExporter.__init__(self, col) @@ -427,22 +430,32 @@ class AnkiCollectionPackageExporter(AnkiPackageExporter): def key(col: Collection) -> str: return col.tr.exporting_anki_collection_package() - def doExport(self, z, path): - "Export collection. Caller must re-open afterwards." - # close our deck & write it into the zip file - self.count = self.col.card_count() - v2 = self.col.sched_ver() != 1 - mdir = self.col.media.dir() - self.col.close(downgrade=True) - if not v2: - z.write(self.col.path, "collection.anki2") - else: - self._addDummyCollection(z) - z.write(self.col.path, "collection.anki21") - # copy all media - if not self.includeMedia: - return {} - return self._exportMedia(z, os.listdir(mdir), mdir) + def exportInto(self, path: str) -> None: + """Export collection. Caller must re-open afterwards.""" + + def exporting_media() -> bool: + return any( + hook.__name__ == "exported_media" + for hook in hooks.media_files_did_export._hooks + ) + + def progress() -> None: + while exporting_media(): + progress = self.col._backend.latest_progress() + if progress.HasField("exporting"): + hooks.media_files_did_export(progress.exporting) + time.sleep(0.1) + + threading.Thread(target=progress).start() + self.col.export_collection(path, self.includeMedia, self.LEGACY) + + +class AnkiCollectionPackage21bExporter(AnkiCollectionPackageExporter): + LEGACY = False + + @staticmethod + def key(_col: Collection) -> str: + return "Anki 2.1.50+ Collection Package" # Export modules @@ -459,6 +472,7 @@ def exporters(col: Collection) -> list[tuple[str, Any]]: exps = [ id(AnkiCollectionPackageExporter), + id(AnkiCollectionPackage21bExporter), id(AnkiPackageExporter), id(TextNoteExporter), id(TextCardExporter), diff --git a/rslib/src/backend/collection.rs b/rslib/src/backend/collection.rs index 9e9672067..80247e704 100644 --- a/rslib/src/backend/collection.rs +++ b/rslib/src/backend/collection.rs @@ -1,7 +1,7 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -use std::path::Path; +use std::{path::Path, sync::MutexGuard}; use slog::error; @@ -12,6 +12,7 @@ use crate::{ backend_proto::{self as pb, preferences::Backups}, collection::{ backup::{self, ImportProgress}, + exporting::export_collection_file, CollectionBuilder, }, log::{self}, @@ -30,10 +31,7 @@ impl CollectionService for Backend { } fn open_collection(&self, input: pb::OpenCollectionRequest) -> Result { - let mut col = self.col.lock().unwrap(); - if col.is_some() { - return Err(AnkiError::CollectionAlreadyOpen); - } + let mut guard = self.lock_closed_collection()?; let mut builder = CollectionBuilder::new(input.collection_path); builder @@ -46,7 +44,7 @@ impl CollectionService for Backend { builder.set_logger(self.log.clone()); } - *col = Some(builder.build()?); + *guard = Some(builder.build()?); Ok(().into()) } @@ -54,12 +52,9 @@ impl CollectionService for Backend { fn close_collection(&self, input: pb::CloseCollectionRequest) -> Result { self.abort_media_sync_and_wait(); - let mut col = self.col.lock().unwrap(); - if col.is_none() { - return Err(AnkiError::CollectionNotOpen); - } + let mut guard = self.lock_open_collection()?; - let mut col_inner = col.take().unwrap(); + let mut col_inner = guard.take().unwrap(); let limits = col_inner.get_backups(); let col_path = std::mem::take(&mut col_inner.col_path); @@ -82,30 +77,39 @@ impl CollectionService for Backend { Ok(().into()) } - fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result { - let col = self.col.lock().unwrap(); - if col.is_some() { - Err(AnkiError::CollectionAlreadyOpen) - } else { - let mut handler = self.new_progress_handler(); - let progress_fn = move |progress| { - let throttle = matches!(progress, ImportProgress::Media(_)); - if handler.update(Progress::Import(progress), throttle) { - Ok(()) - } else { - Err(AnkiError::Interrupted) - } - }; + fn export_collection(&self, input: pb::ExportCollectionRequest) -> Result { + self.abort_media_sync_and_wait(); - backup::restore_backup( - progress_fn, - &input.col_path, - &input.backup_path, - &input.media_folder, - &self.tr, - ) - .map(Into::into) - } + let mut guard = self.lock_open_collection()?; + + let col_inner = guard.take().unwrap(); + let col_path = col_inner.col_path.clone(); + let media_dir = input.include_media.then(|| col_inner.media_folder.clone()); + + col_inner.close(true)?; + + export_collection_file( + input.out_path, + col_path, + media_dir, + input.legacy, + &self.tr, + self.export_progress_fn(), + ) + .map(Into::into) + } + + fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result { + let _guard = self.lock_closed_collection()?; + + backup::restore_backup( + self.import_progress_fn(), + &input.col_path, + &input.backup_path, + &input.media_folder, + &self.tr, + ) + .map(Into::into) } fn check_database(&self, _input: pb::Empty) -> Result { @@ -150,6 +154,22 @@ impl CollectionService for Backend { } impl Backend { + fn lock_open_collection(&self) -> Result>> { + let guard = self.col.lock().unwrap(); + guard + .is_some() + .then(|| guard) + .ok_or(AnkiError::CollectionNotOpen) + } + + fn lock_closed_collection(&self) -> Result>> { + let guard = self.col.lock().unwrap(); + guard + .is_none() + .then(|| guard) + .ok_or(AnkiError::CollectionAlreadyOpen) + } + fn await_backup_completion(&self) { if let Some(task) = self.backup_task.lock().unwrap().take() { task.join().unwrap(); @@ -170,8 +190,28 @@ impl Backend { limits, minimum_backup_interval, self.log.clone(), + self.tr.clone(), )?; Ok(()) } + + fn import_progress_fn(&self) -> impl FnMut(ImportProgress) -> Result<()> { + let mut handler = self.new_progress_handler(); + move |progress| { + let throttle = matches!(progress, ImportProgress::Media(_)); + if handler.update(Progress::Import(progress), throttle) { + Ok(()) + } else { + Err(AnkiError::Interrupted) + } + } + } + + fn export_progress_fn(&self) -> impl FnMut(usize) { + let mut handler = self.new_progress_handler(); + move |media_files| { + handler.update(Progress::Export(media_files), true); + } + } } diff --git a/rslib/src/backend/progress.rs b/rslib/src/backend/progress.rs index fd10f7d59..ea88c1c29 100644 --- a/rslib/src/backend/progress.rs +++ b/rslib/src/backend/progress.rs @@ -52,6 +52,7 @@ pub(super) enum Progress { NormalSync(NormalSyncProgress), DatabaseCheck(DatabaseCheckProgress), Import(ImportProgress), + Export(usize), } pub(super) fn progress_to_proto(progress: Option, tr: &I18n) -> pb::Progress { @@ -112,6 +113,7 @@ pub(super) fn progress_to_proto(progress: Option, tr: &I18n) -> pb::Pr } .into(), ), + Progress::Export(progress) => pb::progress::Value::Exporting(progress as u32), } } else { pb::progress::Value::None(pb::Empty {}) diff --git a/rslib/src/collection/backup.rs b/rslib/src/collection/backup.rs index b1db8a3fe..fb46f7701 100644 --- a/rslib/src/collection/backup.rs +++ b/rslib/src/collection/backup.rs @@ -5,7 +5,7 @@ use std::{ collections::HashMap, ffi::OsStr, fs::{self, read_dir, remove_file, DirEntry, File}, - io::{self, Read, Write}, + io::{self, Write}, path::{Path, PathBuf}, thread::{self, JoinHandle}, time::SystemTime, @@ -14,32 +14,25 @@ use std::{ use chrono::prelude::*; use itertools::Itertools; use log::error; -use serde_derive::{Deserialize, Serialize}; use tempfile::NamedTempFile; -use zip::{write::FileOptions, CompressionMethod, ZipArchive, ZipWriter}; -use zstd::{self, stream::copy_decode, Encoder}; +use zip::ZipArchive; +use zstd::{self, stream::copy_decode}; use crate::{ - backend_proto::preferences::Backups, collection::CollectionBuilder, error::ImportError, log, - prelude::*, text::normalize_to_nfc, + backend_proto::preferences::Backups, + collection::{ + exporting::{export_collection_data, Meta, PACKAGE_VERSION}, + CollectionBuilder, + }, + error::ImportError, + log, + prelude::*, + text::normalize_to_nfc, }; -/// Bump if making changes that break restoring on older releases. -const BACKUP_VERSION: u8 = 3; const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg"; /// Default seconds after a backup, in which further backups will be skipped. const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60; -/// Enable multithreaded compression if over this size. For smaller files, -/// multithreading makes things slower, and in initial tests, the crossover -/// point was somewhere between 1MB and 10MB on a many-core system. -const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024; - -#[derive(Debug, Default, Serialize, Deserialize)] -#[serde(default)] -struct Meta { - #[serde(rename = "ver")] - version: u8, -} #[derive(Debug, Clone, Copy, PartialEq)] pub enum ImportProgress { @@ -53,6 +46,7 @@ pub fn backup( limits: Backups, minimum_backup_interval: Option, log: Logger, + tr: I18n, ) -> Result>> { let recent_secs = minimum_backup_interval.unwrap_or(MINIMUM_BACKUP_INTERVAL); if recent_secs > 0 && has_recent_backup(backup_folder.as_ref(), recent_secs)? { @@ -60,7 +54,7 @@ pub fn backup( } else { let col_data = std::fs::read(col_path)?; Ok(Some(thread::spawn(move || { - backup_inner(&col_data, &backup_folder, limits, log) + backup_inner(&col_data, &backup_folder, limits, log, &tr) }))) } } @@ -99,7 +93,7 @@ pub fn restore_backup( progress_fn(ImportProgress::Collection)?; let mut result = String::new(); - if let Err(e) = restore_media(progress_fn, &mut archive, media_folder) { + if let Err(e) = restore_media(meta, progress_fn, &mut archive, media_folder) { result = tr .importing_failed_to_import_media_file(e.localized_description(tr)) .into_owned() @@ -114,8 +108,14 @@ pub fn restore_backup( Ok(result) } -fn backup_inner>(col_data: &[u8], backup_folder: P, limits: Backups, log: Logger) { - if let Err(error) = write_backup(col_data, backup_folder.as_ref()) { +fn backup_inner>( + col_data: &[u8], + backup_folder: P, + limits: Backups, + log: Logger, + tr: &I18n, +) { + if let Err(error) = write_backup(col_data, backup_folder.as_ref(), tr) { error!(log, "failed to backup collection: {error:?}"); } if let Err(error) = thin_backups(backup_folder, limits, &log) { @@ -123,36 +123,10 @@ fn backup_inner>(col_data: &[u8], backup_folder: P, limits: Backu } } -fn write_backup>(mut col_data: &[u8], backup_folder: S) -> Result<()> { - let out_file = File::create(out_path(backup_folder))?; - let mut zip = ZipWriter::new(out_file); - let options = FileOptions::default().compression_method(CompressionMethod::Stored); - let meta = serde_json::to_string(&Meta { - version: BACKUP_VERSION, - }) - .unwrap(); - - zip.start_file("meta", options)?; - zip.write_all(meta.as_bytes())?; - zip.start_file("collection.anki21b", options)?; - let col_data_len = col_data.len(); - zstd_copy(&mut col_data, &mut zip, col_data_len)?; - zip.start_file("media", options)?; - zip.write_all(b"{}")?; - zip.finish()?; - - Ok(()) -} - -/// Copy contents of reader into writer, compressing as we copy. -fn zstd_copy(reader: &mut R, writer: &mut W, size: usize) -> Result<()> { - let mut encoder = Encoder::new(writer, 0)?; - if size > MULTITHREAD_MIN_BYTES { - encoder.multithread(num_cpus::get() as u32)?; - } - io::copy(reader, &mut encoder)?; - encoder.finish()?; - Ok(()) +fn write_backup>(col_data: &[u8], backup_folder: S, tr: &I18n) -> Result<()> { + let out_path = + Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING))); + export_collection_data(&out_path, col_data, tr) } fn thin_backups>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> { @@ -168,10 +142,6 @@ fn thin_backups>(backup_folder: P, limits: Backups, log: &Logger) Ok(()) } -fn out_path>(backup_folder: S) -> PathBuf { - Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING))) -} - fn datetime_from_file_name(file_name: &str) -> Option> { NaiveDateTime::parse_from_str(file_name, BACKUP_FORMAT_STRING) .ok() @@ -319,7 +289,7 @@ impl Meta { .ok() .and_then(|file| serde_json::from_reader(file).ok()) .unwrap_or_default(); - if meta.version > BACKUP_VERSION { + if meta.version > PACKAGE_VERSION { return Err(AnkiError::ImportError(ImportError::TooNew)); } else if meta.version == 0 { meta.version = if archive.by_name("collection.anki21").is_ok() { @@ -331,14 +301,6 @@ impl Meta { Ok(meta) } - - fn collection_name(&self) -> &'static str { - match self.version { - 1 => "collection.anki2", - 2 => "collection.anki21", - _ => "collection.anki21b", - } - } } fn check_collection(col_path: &Path) -> Result<()> { @@ -356,21 +318,22 @@ fn check_collection(col_path: &Path) -> Result<()> { } fn restore_media( + meta: Meta, mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, archive: &mut ZipArchive, media_folder: &str, ) -> Result<()> { - let media_file_names = extract_media_file_names(archive).ok_or(AnkiError::NotFound)?; + let media_file_names = extract_media_file_names(meta, archive)?; let mut count = 0; - for (archive_file_name, file_name) in media_file_names { + for (archive_file_name, file_name) in media_file_names.iter().enumerate() { count += 1; if count % 10 == 0 { progress_fn(ImportProgress::Media(count))?; } - if let Ok(mut zip_file) = archive.by_name(&archive_file_name) { - let file_path = Path::new(&media_folder).join(normalize_to_nfc(&file_name).as_ref()); + if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) { + let file_path = Path::new(&media_folder).join(normalize_to_nfc(file_name).as_ref()); let files_are_equal = fs::metadata(&file_path) .map(|metadata| metadata.len() == zip_file.size()) .unwrap_or_default(); @@ -392,15 +355,20 @@ fn restore_media( Ok(()) } -fn extract_media_file_names(archive: &mut ZipArchive) -> Option> { - archive - .by_name("media") - .ok() - .and_then(|mut file| { - let mut buf = Vec::new(); - file.read_to_end(&mut buf).ok().map(|_| buf) - }) - .and_then(|bytes| serde_json::from_slice(&bytes).ok()) +fn extract_media_file_names(meta: Meta, archive: &mut ZipArchive) -> Result> { + let mut file = archive.by_name("media")?; + let mut buf = Vec::new(); + if meta.zstd_compressed() { + copy_decode(file, &mut buf)?; + } else { + io::copy(&mut file, &mut buf)?; + } + if meta.media_list_is_hashmap() { + let map: HashMap<&str, String> = serde_json::from_slice(&buf)?; + Ok(map.into_iter().map(|(_k, v)| v).collect()) + } else { + serde_json::from_slice(&buf).map_err(Into::into) + } } fn copy_collection( @@ -411,7 +379,7 @@ fn copy_collection( let mut file = archive .by_name(meta.collection_name()) .map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?; - if meta.version < 3 { + if !meta.zstd_compressed() { io::copy(&mut file, writer)?; } else { copy_decode(file, writer)?; diff --git a/rslib/src/collection/exporting.rs b/rslib/src/collection/exporting.rs new file mode 100644 index 000000000..9b1a6a133 --- /dev/null +++ b/rslib/src/collection/exporting.rs @@ -0,0 +1,291 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::{ + collections::HashMap, + fs::{read_dir, DirEntry, File}, + io::{self, Read, Write}, + path::{Path, PathBuf}, +}; + +use serde_derive::{Deserialize, Serialize}; +use tempfile::NamedTempFile; +use zip::{write::FileOptions, CompressionMethod, ZipWriter}; +use zstd::{ + stream::{raw::Encoder as RawEncoder, zio::Writer}, + Encoder, +}; + +use crate::{collection::CollectionBuilder, prelude::*, text::normalize_to_nfc}; + +/// Bump if making changes that break restoring on older releases. +pub const PACKAGE_VERSION: u8 = 3; +const COLLECTION_NAME: &str = "collection.anki21b"; +const COLLECTION_NAME_V1: &str = "collection.anki2"; +const COLLECTION_NAME_V2: &str = "collection.anki21"; +/// Enable multithreaded compression if over this size. For smaller files, +/// multithreading makes things slower, and in initial tests, the crossover +/// point was somewhere between 1MB and 10MB on a many-core system. +const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024; + +#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)] +#[serde(default)] +pub(super) struct Meta { + #[serde(rename = "ver")] + pub(super) version: u8, +} + +impl Meta { + pub(super) fn new() -> Self { + Self { + version: PACKAGE_VERSION, + } + } + + pub(super) fn new_v2() -> Self { + Self { version: 2 } + } + + pub(super) fn collection_name(&self) -> &'static str { + match self.version { + 1 => COLLECTION_NAME_V1, + 2 => COLLECTION_NAME_V2, + _ => COLLECTION_NAME, + } + } + + pub(super) fn zstd_compressed(&self) -> bool { + self.version >= 3 + } + + pub(super) fn media_list_is_hashmap(&self) -> bool { + self.version < 3 + } +} + +pub fn export_collection_file( + out_path: impl AsRef, + col_path: impl AsRef, + media_dir: Option, + legacy: bool, + tr: &I18n, + progress_fn: impl FnMut(usize), +) -> Result<()> { + let meta = if legacy { Meta::new_v2() } else { Meta::new() }; + let mut col_file = File::open(col_path)?; + let col_size = col_file.metadata()?.len() as usize; + export_collection( + meta, + out_path, + &mut col_file, + col_size, + media_dir, + tr, + progress_fn, + ) +} + +pub(crate) fn export_collection_data( + out_path: impl AsRef, + mut col_data: &[u8], + tr: &I18n, +) -> Result<()> { + let col_size = col_data.len(); + export_collection( + Meta::new(), + out_path, + &mut col_data, + col_size, + None, + tr, + |_| (), + ) +} + +fn export_collection( + meta: Meta, + out_path: impl AsRef, + col: &mut impl Read, + col_size: usize, + media_dir: Option, + tr: &I18n, + progress_fn: impl FnMut(usize), +) -> Result<()> { + let out_file = File::create(&out_path)?; + let mut zip = ZipWriter::new(out_file); + + zip.start_file("meta", file_options_stored())?; + zip.write_all(serde_json::to_string(&meta).unwrap().as_bytes())?; + write_collection(meta, &mut zip, col, col_size)?; + write_dummy_collection(&mut zip, tr)?; + write_media(meta, &mut zip, media_dir, progress_fn)?; + zip.finish()?; + + Ok(()) +} + +fn file_options_stored() -> FileOptions { + FileOptions::default().compression_method(CompressionMethod::Stored) +} + +fn write_collection( + meta: Meta, + zip: &mut ZipWriter, + col: &mut impl Read, + size: usize, +) -> Result<()> { + if meta.zstd_compressed() { + zip.start_file(meta.collection_name(), file_options_stored())?; + zstd_copy(col, zip, size)?; + } else { + zip.start_file(meta.collection_name(), FileOptions::default())?; + io::copy(col, zip)?; + } + Ok(()) +} + +fn write_dummy_collection(zip: &mut ZipWriter, tr: &I18n) -> Result<()> { + let mut tempfile = create_dummy_collection_file(tr)?; + zip.start_file(COLLECTION_NAME_V1, file_options_stored())?; + io::copy(&mut tempfile, zip)?; + + Ok(()) +} + +fn create_dummy_collection_file(tr: &I18n) -> Result { + let tempfile = NamedTempFile::new()?; + let mut dummy_col = CollectionBuilder::new(tempfile.path()).build()?; + dummy_col.add_dummy_note(tr)?; + dummy_col + .storage + .db + .execute_batch("pragma page_size=512; pragma journal_mode=delete; vacuum;")?; + dummy_col.close(true)?; + + Ok(tempfile) +} + +impl Collection { + fn add_dummy_note(&mut self, tr: &I18n) -> Result<()> { + let notetype = self.get_notetype_by_name("basic")?.unwrap(); + let mut note = notetype.new_note(); + note.set_field(0, tr.exporting_colpkg_too_new())?; + self.add_note(&mut note, DeckId(1))?; + Ok(()) + } +} + +/// Copy contents of reader into writer, compressing as we copy. +fn zstd_copy(reader: &mut impl Read, writer: &mut impl Write, size: usize) -> Result<()> { + let mut encoder = Encoder::new(writer, 0)?; + if size > MULTITHREAD_MIN_BYTES { + encoder.multithread(num_cpus::get() as u32)?; + } + io::copy(reader, &mut encoder)?; + encoder.finish()?; + Ok(()) +} + +fn write_media( + meta: Meta, + zip: &mut ZipWriter, + media_dir: Option, + progress_fn: impl FnMut(usize), +) -> Result<()> { + let mut media_names = vec![]; + + if let Some(media_dir) = media_dir { + write_media_files(meta, zip, &media_dir, &mut media_names, progress_fn)?; + } + + write_media_map(meta, &media_names, zip)?; + + Ok(()) +} + +fn write_media_map(meta: Meta, media_names: &[String], zip: &mut ZipWriter) -> Result<()> { + zip.start_file("media", file_options_stored())?; + let json_bytes = if meta.media_list_is_hashmap() { + let map: HashMap = media_names + .iter() + .enumerate() + .map(|(k, v)| (k.to_string(), v.as_str())) + .collect(); + serde_json::to_vec(&map)? + } else { + serde_json::to_vec(media_names)? + }; + let size = json_bytes.len(); + let mut cursor = std::io::Cursor::new(json_bytes); + if meta.zstd_compressed() { + zstd_copy(&mut cursor, zip, size)?; + } else { + io::copy(&mut cursor, zip)?; + } + Ok(()) +} + +fn write_media_files( + meta: Meta, + zip: &mut ZipWriter, + dir: &Path, + names: &mut Vec, + mut progress_fn: impl FnMut(usize), +) -> Result<()> { + let mut writer = MediaFileWriter::new(meta); + let mut index = 0; + for entry in read_dir(dir)? { + let entry = entry?; + if !entry.metadata()?.is_file() { + continue; + } + progress_fn(index); + names.push(normalized_unicode_file_name(&entry)?); + zip.start_file(index.to_string(), file_options_stored())?; + writer = writer.write(&mut File::open(entry.path())?, zip)?; + // can't enumerate(), as we skip folders + index += 1; + } + + Ok(()) +} + +fn normalized_unicode_file_name(entry: &DirEntry) -> Result { + entry + .file_name() + .to_str() + .map(|name| normalize_to_nfc(name).into()) + .ok_or_else(|| { + AnkiError::IoError(format!( + "non-unicode file name: {}", + entry.file_name().to_string_lossy() + )) + }) +} + +/// Writes media files while compressing according to the targeted version. +/// If compressing, the encoder is reused to optimize for repeated calls. +struct MediaFileWriter(Option>); + +impl MediaFileWriter { + fn new(meta: Meta) -> Self { + Self( + meta.zstd_compressed() + .then(|| RawEncoder::with_dictionary(0, &[]).unwrap()), + ) + } + + fn write(mut self, reader: &mut impl Read, writer: &mut impl Write) -> Result { + // take [self] by value to prevent it from being reused after an error + if let Some(encoder) = self.0.take() { + let mut encoder_writer = Writer::new(writer, encoder); + io::copy(reader, &mut encoder_writer)?; + encoder_writer.finish()?; + self.0 = Some(encoder_writer.into_inner().1); + } else { + io::copy(reader, writer)?; + } + + Ok(self) + } +} diff --git a/rslib/src/collection/mod.rs b/rslib/src/collection/mod.rs index b5d8fa4f2..27e3aabb5 100644 --- a/rslib/src/collection/mod.rs +++ b/rslib/src/collection/mod.rs @@ -2,6 +2,7 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html pub mod backup; +pub mod exporting; pub(crate) mod timestamps; mod transact; pub(crate) mod undo;