diff --git a/ftl/core/exporting.ftl b/ftl/core/exporting.ftl index c373b63ed..b396001f3 100644 --- a/ftl/core/exporting.ftl +++ b/ftl/core/exporting.ftl @@ -5,6 +5,7 @@ exporting-anki-deck-package = Anki Deck Package exporting-cards-in-plain-text = Cards in Plain Text exporting-collection = collection exporting-collection-exported = Collection exported. +exporting-colpkg-too-new = Please update to the latest Anki version, then import the .colpkg file again. exporting-couldnt-save-file = Couldn't save file: { $val } exporting-export = Export... exporting-export-format = Export format: diff --git a/proto/anki/collection.proto b/proto/anki/collection.proto index a76097dff..7d55b9b60 100644 --- a/proto/anki/collection.proto +++ b/proto/anki/collection.proto @@ -20,6 +20,7 @@ service CollectionService { rpc LatestProgress(generic.Empty) returns (Progress); rpc SetWantsAbort(generic.Empty) returns (generic.Empty); rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty); + rpc ExportCollection(ExportCollectionRequest) returns (generic.Empty); } message OpenCollectionRequest { @@ -121,5 +122,12 @@ message Progress { NormalSync normal_sync = 5; DatabaseCheck database_check = 6; string importing = 7; + uint32 exporting = 8; } } + +message ExportCollectionRequest { + string out_path = 1; + bool include_media = 2; + bool legacy = 3; +} diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index 84896cb5a..2e7468129 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -264,6 +264,14 @@ class Collection(DeprecatedNamesMixin): self._clear_caches() self.db = None + def export_collection( + self, out_path: str, include_media: bool, legacy: bool + ) -> None: + self.close_for_full_sync() + self._backend.export_collection( + out_path=out_path, include_media=include_media, legacy=legacy + ) + def rollback(self) -> None: self._clear_caches() self.db.rollback() diff --git a/pylib/anki/exporting.py b/pylib/anki/exporting.py index db433c911..1c6398aaa 100644 --- a/pylib/anki/exporting.py +++ b/pylib/anki/exporting.py @@ -9,6 +9,8 @@ import json import os import re import shutil +import threading +import time import unicodedata import zipfile from io import BufferedWriter @@ -419,6 +421,7 @@ class AnkiCollectionPackageExporter(AnkiPackageExporter): ext = ".colpkg" verbatim = True includeSched = None + LEGACY = True def __init__(self, col): AnkiPackageExporter.__init__(self, col) @@ -427,22 +430,32 @@ class AnkiCollectionPackageExporter(AnkiPackageExporter): def key(col: Collection) -> str: return col.tr.exporting_anki_collection_package() - def doExport(self, z, path): - "Export collection. Caller must re-open afterwards." - # close our deck & write it into the zip file - self.count = self.col.card_count() - v2 = self.col.sched_ver() != 1 - mdir = self.col.media.dir() - self.col.close(downgrade=True) - if not v2: - z.write(self.col.path, "collection.anki2") - else: - self._addDummyCollection(z) - z.write(self.col.path, "collection.anki21") - # copy all media - if not self.includeMedia: - return {} - return self._exportMedia(z, os.listdir(mdir), mdir) + def exportInto(self, path: str) -> None: + """Export collection. Caller must re-open afterwards.""" + + def exporting_media() -> bool: + return any( + hook.__name__ == "exported_media" + for hook in hooks.media_files_did_export._hooks + ) + + def progress() -> None: + while exporting_media(): + progress = self.col._backend.latest_progress() + if progress.HasField("exporting"): + hooks.media_files_did_export(progress.exporting) + time.sleep(0.1) + + threading.Thread(target=progress).start() + self.col.export_collection(path, self.includeMedia, self.LEGACY) + + +class AnkiCollectionPackage21bExporter(AnkiCollectionPackageExporter): + LEGACY = False + + @staticmethod + def key(_col: Collection) -> str: + return "Anki 2.1.50+ Collection Package" # Export modules @@ -459,6 +472,7 @@ def exporters(col: Collection) -> list[tuple[str, Any]]: exps = [ id(AnkiCollectionPackageExporter), + id(AnkiCollectionPackage21bExporter), id(AnkiPackageExporter), id(TextNoteExporter), id(TextCardExporter), diff --git a/rslib/src/backend/collection.rs b/rslib/src/backend/collection.rs index 9e9672067..80247e704 100644 --- a/rslib/src/backend/collection.rs +++ b/rslib/src/backend/collection.rs @@ -1,7 +1,7 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -use std::path::Path; +use std::{path::Path, sync::MutexGuard}; use slog::error; @@ -12,6 +12,7 @@ use crate::{ backend_proto::{self as pb, preferences::Backups}, collection::{ backup::{self, ImportProgress}, + exporting::export_collection_file, CollectionBuilder, }, log::{self}, @@ -30,10 +31,7 @@ impl CollectionService for Backend { } fn open_collection(&self, input: pb::OpenCollectionRequest) -> Result { - let mut col = self.col.lock().unwrap(); - if col.is_some() { - return Err(AnkiError::CollectionAlreadyOpen); - } + let mut guard = self.lock_closed_collection()?; let mut builder = CollectionBuilder::new(input.collection_path); builder @@ -46,7 +44,7 @@ impl CollectionService for Backend { builder.set_logger(self.log.clone()); } - *col = Some(builder.build()?); + *guard = Some(builder.build()?); Ok(().into()) } @@ -54,12 +52,9 @@ impl CollectionService for Backend { fn close_collection(&self, input: pb::CloseCollectionRequest) -> Result { self.abort_media_sync_and_wait(); - let mut col = self.col.lock().unwrap(); - if col.is_none() { - return Err(AnkiError::CollectionNotOpen); - } + let mut guard = self.lock_open_collection()?; - let mut col_inner = col.take().unwrap(); + let mut col_inner = guard.take().unwrap(); let limits = col_inner.get_backups(); let col_path = std::mem::take(&mut col_inner.col_path); @@ -82,30 +77,39 @@ impl CollectionService for Backend { Ok(().into()) } - fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result { - let col = self.col.lock().unwrap(); - if col.is_some() { - Err(AnkiError::CollectionAlreadyOpen) - } else { - let mut handler = self.new_progress_handler(); - let progress_fn = move |progress| { - let throttle = matches!(progress, ImportProgress::Media(_)); - if handler.update(Progress::Import(progress), throttle) { - Ok(()) - } else { - Err(AnkiError::Interrupted) - } - }; + fn export_collection(&self, input: pb::ExportCollectionRequest) -> Result { + self.abort_media_sync_and_wait(); - backup::restore_backup( - progress_fn, - &input.col_path, - &input.backup_path, - &input.media_folder, - &self.tr, - ) - .map(Into::into) - } + let mut guard = self.lock_open_collection()?; + + let col_inner = guard.take().unwrap(); + let col_path = col_inner.col_path.clone(); + let media_dir = input.include_media.then(|| col_inner.media_folder.clone()); + + col_inner.close(true)?; + + export_collection_file( + input.out_path, + col_path, + media_dir, + input.legacy, + &self.tr, + self.export_progress_fn(), + ) + .map(Into::into) + } + + fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result { + let _guard = self.lock_closed_collection()?; + + backup::restore_backup( + self.import_progress_fn(), + &input.col_path, + &input.backup_path, + &input.media_folder, + &self.tr, + ) + .map(Into::into) } fn check_database(&self, _input: pb::Empty) -> Result { @@ -150,6 +154,22 @@ impl CollectionService for Backend { } impl Backend { + fn lock_open_collection(&self) -> Result>> { + let guard = self.col.lock().unwrap(); + guard + .is_some() + .then(|| guard) + .ok_or(AnkiError::CollectionNotOpen) + } + + fn lock_closed_collection(&self) -> Result>> { + let guard = self.col.lock().unwrap(); + guard + .is_none() + .then(|| guard) + .ok_or(AnkiError::CollectionAlreadyOpen) + } + fn await_backup_completion(&self) { if let Some(task) = self.backup_task.lock().unwrap().take() { task.join().unwrap(); @@ -170,8 +190,28 @@ impl Backend { limits, minimum_backup_interval, self.log.clone(), + self.tr.clone(), )?; Ok(()) } + + fn import_progress_fn(&self) -> impl FnMut(ImportProgress) -> Result<()> { + let mut handler = self.new_progress_handler(); + move |progress| { + let throttle = matches!(progress, ImportProgress::Media(_)); + if handler.update(Progress::Import(progress), throttle) { + Ok(()) + } else { + Err(AnkiError::Interrupted) + } + } + } + + fn export_progress_fn(&self) -> impl FnMut(usize) { + let mut handler = self.new_progress_handler(); + move |media_files| { + handler.update(Progress::Export(media_files), true); + } + } } diff --git a/rslib/src/backend/progress.rs b/rslib/src/backend/progress.rs index fd10f7d59..ea88c1c29 100644 --- a/rslib/src/backend/progress.rs +++ b/rslib/src/backend/progress.rs @@ -52,6 +52,7 @@ pub(super) enum Progress { NormalSync(NormalSyncProgress), DatabaseCheck(DatabaseCheckProgress), Import(ImportProgress), + Export(usize), } pub(super) fn progress_to_proto(progress: Option, tr: &I18n) -> pb::Progress { @@ -112,6 +113,7 @@ pub(super) fn progress_to_proto(progress: Option, tr: &I18n) -> pb::Pr } .into(), ), + Progress::Export(progress) => pb::progress::Value::Exporting(progress as u32), } } else { pb::progress::Value::None(pb::Empty {}) diff --git a/rslib/src/collection/backup.rs b/rslib/src/collection/backup.rs index b1db8a3fe..fb46f7701 100644 --- a/rslib/src/collection/backup.rs +++ b/rslib/src/collection/backup.rs @@ -5,7 +5,7 @@ use std::{ collections::HashMap, ffi::OsStr, fs::{self, read_dir, remove_file, DirEntry, File}, - io::{self, Read, Write}, + io::{self, Write}, path::{Path, PathBuf}, thread::{self, JoinHandle}, time::SystemTime, @@ -14,32 +14,25 @@ use std::{ use chrono::prelude::*; use itertools::Itertools; use log::error; -use serde_derive::{Deserialize, Serialize}; use tempfile::NamedTempFile; -use zip::{write::FileOptions, CompressionMethod, ZipArchive, ZipWriter}; -use zstd::{self, stream::copy_decode, Encoder}; +use zip::ZipArchive; +use zstd::{self, stream::copy_decode}; use crate::{ - backend_proto::preferences::Backups, collection::CollectionBuilder, error::ImportError, log, - prelude::*, text::normalize_to_nfc, + backend_proto::preferences::Backups, + collection::{ + exporting::{export_collection_data, Meta, PACKAGE_VERSION}, + CollectionBuilder, + }, + error::ImportError, + log, + prelude::*, + text::normalize_to_nfc, }; -/// Bump if making changes that break restoring on older releases. -const BACKUP_VERSION: u8 = 3; const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg"; /// Default seconds after a backup, in which further backups will be skipped. const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60; -/// Enable multithreaded compression if over this size. For smaller files, -/// multithreading makes things slower, and in initial tests, the crossover -/// point was somewhere between 1MB and 10MB on a many-core system. -const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024; - -#[derive(Debug, Default, Serialize, Deserialize)] -#[serde(default)] -struct Meta { - #[serde(rename = "ver")] - version: u8, -} #[derive(Debug, Clone, Copy, PartialEq)] pub enum ImportProgress { @@ -53,6 +46,7 @@ pub fn backup( limits: Backups, minimum_backup_interval: Option, log: Logger, + tr: I18n, ) -> Result>> { let recent_secs = minimum_backup_interval.unwrap_or(MINIMUM_BACKUP_INTERVAL); if recent_secs > 0 && has_recent_backup(backup_folder.as_ref(), recent_secs)? { @@ -60,7 +54,7 @@ pub fn backup( } else { let col_data = std::fs::read(col_path)?; Ok(Some(thread::spawn(move || { - backup_inner(&col_data, &backup_folder, limits, log) + backup_inner(&col_data, &backup_folder, limits, log, &tr) }))) } } @@ -99,7 +93,7 @@ pub fn restore_backup( progress_fn(ImportProgress::Collection)?; let mut result = String::new(); - if let Err(e) = restore_media(progress_fn, &mut archive, media_folder) { + if let Err(e) = restore_media(meta, progress_fn, &mut archive, media_folder) { result = tr .importing_failed_to_import_media_file(e.localized_description(tr)) .into_owned() @@ -114,8 +108,14 @@ pub fn restore_backup( Ok(result) } -fn backup_inner>(col_data: &[u8], backup_folder: P, limits: Backups, log: Logger) { - if let Err(error) = write_backup(col_data, backup_folder.as_ref()) { +fn backup_inner>( + col_data: &[u8], + backup_folder: P, + limits: Backups, + log: Logger, + tr: &I18n, +) { + if let Err(error) = write_backup(col_data, backup_folder.as_ref(), tr) { error!(log, "failed to backup collection: {error:?}"); } if let Err(error) = thin_backups(backup_folder, limits, &log) { @@ -123,36 +123,10 @@ fn backup_inner>(col_data: &[u8], backup_folder: P, limits: Backu } } -fn write_backup>(mut col_data: &[u8], backup_folder: S) -> Result<()> { - let out_file = File::create(out_path(backup_folder))?; - let mut zip = ZipWriter::new(out_file); - let options = FileOptions::default().compression_method(CompressionMethod::Stored); - let meta = serde_json::to_string(&Meta { - version: BACKUP_VERSION, - }) - .unwrap(); - - zip.start_file("meta", options)?; - zip.write_all(meta.as_bytes())?; - zip.start_file("collection.anki21b", options)?; - let col_data_len = col_data.len(); - zstd_copy(&mut col_data, &mut zip, col_data_len)?; - zip.start_file("media", options)?; - zip.write_all(b"{}")?; - zip.finish()?; - - Ok(()) -} - -/// Copy contents of reader into writer, compressing as we copy. -fn zstd_copy(reader: &mut R, writer: &mut W, size: usize) -> Result<()> { - let mut encoder = Encoder::new(writer, 0)?; - if size > MULTITHREAD_MIN_BYTES { - encoder.multithread(num_cpus::get() as u32)?; - } - io::copy(reader, &mut encoder)?; - encoder.finish()?; - Ok(()) +fn write_backup>(col_data: &[u8], backup_folder: S, tr: &I18n) -> Result<()> { + let out_path = + Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING))); + export_collection_data(&out_path, col_data, tr) } fn thin_backups>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> { @@ -168,10 +142,6 @@ fn thin_backups>(backup_folder: P, limits: Backups, log: &Logger) Ok(()) } -fn out_path>(backup_folder: S) -> PathBuf { - Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING))) -} - fn datetime_from_file_name(file_name: &str) -> Option> { NaiveDateTime::parse_from_str(file_name, BACKUP_FORMAT_STRING) .ok() @@ -319,7 +289,7 @@ impl Meta { .ok() .and_then(|file| serde_json::from_reader(file).ok()) .unwrap_or_default(); - if meta.version > BACKUP_VERSION { + if meta.version > PACKAGE_VERSION { return Err(AnkiError::ImportError(ImportError::TooNew)); } else if meta.version == 0 { meta.version = if archive.by_name("collection.anki21").is_ok() { @@ -331,14 +301,6 @@ impl Meta { Ok(meta) } - - fn collection_name(&self) -> &'static str { - match self.version { - 1 => "collection.anki2", - 2 => "collection.anki21", - _ => "collection.anki21b", - } - } } fn check_collection(col_path: &Path) -> Result<()> { @@ -356,21 +318,22 @@ fn check_collection(col_path: &Path) -> Result<()> { } fn restore_media( + meta: Meta, mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, archive: &mut ZipArchive, media_folder: &str, ) -> Result<()> { - let media_file_names = extract_media_file_names(archive).ok_or(AnkiError::NotFound)?; + let media_file_names = extract_media_file_names(meta, archive)?; let mut count = 0; - for (archive_file_name, file_name) in media_file_names { + for (archive_file_name, file_name) in media_file_names.iter().enumerate() { count += 1; if count % 10 == 0 { progress_fn(ImportProgress::Media(count))?; } - if let Ok(mut zip_file) = archive.by_name(&archive_file_name) { - let file_path = Path::new(&media_folder).join(normalize_to_nfc(&file_name).as_ref()); + if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) { + let file_path = Path::new(&media_folder).join(normalize_to_nfc(file_name).as_ref()); let files_are_equal = fs::metadata(&file_path) .map(|metadata| metadata.len() == zip_file.size()) .unwrap_or_default(); @@ -392,15 +355,20 @@ fn restore_media( Ok(()) } -fn extract_media_file_names(archive: &mut ZipArchive) -> Option> { - archive - .by_name("media") - .ok() - .and_then(|mut file| { - let mut buf = Vec::new(); - file.read_to_end(&mut buf).ok().map(|_| buf) - }) - .and_then(|bytes| serde_json::from_slice(&bytes).ok()) +fn extract_media_file_names(meta: Meta, archive: &mut ZipArchive) -> Result> { + let mut file = archive.by_name("media")?; + let mut buf = Vec::new(); + if meta.zstd_compressed() { + copy_decode(file, &mut buf)?; + } else { + io::copy(&mut file, &mut buf)?; + } + if meta.media_list_is_hashmap() { + let map: HashMap<&str, String> = serde_json::from_slice(&buf)?; + Ok(map.into_iter().map(|(_k, v)| v).collect()) + } else { + serde_json::from_slice(&buf).map_err(Into::into) + } } fn copy_collection( @@ -411,7 +379,7 @@ fn copy_collection( let mut file = archive .by_name(meta.collection_name()) .map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?; - if meta.version < 3 { + if !meta.zstd_compressed() { io::copy(&mut file, writer)?; } else { copy_decode(file, writer)?; diff --git a/rslib/src/collection/exporting.rs b/rslib/src/collection/exporting.rs new file mode 100644 index 000000000..9b1a6a133 --- /dev/null +++ b/rslib/src/collection/exporting.rs @@ -0,0 +1,291 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::{ + collections::HashMap, + fs::{read_dir, DirEntry, File}, + io::{self, Read, Write}, + path::{Path, PathBuf}, +}; + +use serde_derive::{Deserialize, Serialize}; +use tempfile::NamedTempFile; +use zip::{write::FileOptions, CompressionMethod, ZipWriter}; +use zstd::{ + stream::{raw::Encoder as RawEncoder, zio::Writer}, + Encoder, +}; + +use crate::{collection::CollectionBuilder, prelude::*, text::normalize_to_nfc}; + +/// Bump if making changes that break restoring on older releases. +pub const PACKAGE_VERSION: u8 = 3; +const COLLECTION_NAME: &str = "collection.anki21b"; +const COLLECTION_NAME_V1: &str = "collection.anki2"; +const COLLECTION_NAME_V2: &str = "collection.anki21"; +/// Enable multithreaded compression if over this size. For smaller files, +/// multithreading makes things slower, and in initial tests, the crossover +/// point was somewhere between 1MB and 10MB on a many-core system. +const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024; + +#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)] +#[serde(default)] +pub(super) struct Meta { + #[serde(rename = "ver")] + pub(super) version: u8, +} + +impl Meta { + pub(super) fn new() -> Self { + Self { + version: PACKAGE_VERSION, + } + } + + pub(super) fn new_v2() -> Self { + Self { version: 2 } + } + + pub(super) fn collection_name(&self) -> &'static str { + match self.version { + 1 => COLLECTION_NAME_V1, + 2 => COLLECTION_NAME_V2, + _ => COLLECTION_NAME, + } + } + + pub(super) fn zstd_compressed(&self) -> bool { + self.version >= 3 + } + + pub(super) fn media_list_is_hashmap(&self) -> bool { + self.version < 3 + } +} + +pub fn export_collection_file( + out_path: impl AsRef, + col_path: impl AsRef, + media_dir: Option, + legacy: bool, + tr: &I18n, + progress_fn: impl FnMut(usize), +) -> Result<()> { + let meta = if legacy { Meta::new_v2() } else { Meta::new() }; + let mut col_file = File::open(col_path)?; + let col_size = col_file.metadata()?.len() as usize; + export_collection( + meta, + out_path, + &mut col_file, + col_size, + media_dir, + tr, + progress_fn, + ) +} + +pub(crate) fn export_collection_data( + out_path: impl AsRef, + mut col_data: &[u8], + tr: &I18n, +) -> Result<()> { + let col_size = col_data.len(); + export_collection( + Meta::new(), + out_path, + &mut col_data, + col_size, + None, + tr, + |_| (), + ) +} + +fn export_collection( + meta: Meta, + out_path: impl AsRef, + col: &mut impl Read, + col_size: usize, + media_dir: Option, + tr: &I18n, + progress_fn: impl FnMut(usize), +) -> Result<()> { + let out_file = File::create(&out_path)?; + let mut zip = ZipWriter::new(out_file); + + zip.start_file("meta", file_options_stored())?; + zip.write_all(serde_json::to_string(&meta).unwrap().as_bytes())?; + write_collection(meta, &mut zip, col, col_size)?; + write_dummy_collection(&mut zip, tr)?; + write_media(meta, &mut zip, media_dir, progress_fn)?; + zip.finish()?; + + Ok(()) +} + +fn file_options_stored() -> FileOptions { + FileOptions::default().compression_method(CompressionMethod::Stored) +} + +fn write_collection( + meta: Meta, + zip: &mut ZipWriter, + col: &mut impl Read, + size: usize, +) -> Result<()> { + if meta.zstd_compressed() { + zip.start_file(meta.collection_name(), file_options_stored())?; + zstd_copy(col, zip, size)?; + } else { + zip.start_file(meta.collection_name(), FileOptions::default())?; + io::copy(col, zip)?; + } + Ok(()) +} + +fn write_dummy_collection(zip: &mut ZipWriter, tr: &I18n) -> Result<()> { + let mut tempfile = create_dummy_collection_file(tr)?; + zip.start_file(COLLECTION_NAME_V1, file_options_stored())?; + io::copy(&mut tempfile, zip)?; + + Ok(()) +} + +fn create_dummy_collection_file(tr: &I18n) -> Result { + let tempfile = NamedTempFile::new()?; + let mut dummy_col = CollectionBuilder::new(tempfile.path()).build()?; + dummy_col.add_dummy_note(tr)?; + dummy_col + .storage + .db + .execute_batch("pragma page_size=512; pragma journal_mode=delete; vacuum;")?; + dummy_col.close(true)?; + + Ok(tempfile) +} + +impl Collection { + fn add_dummy_note(&mut self, tr: &I18n) -> Result<()> { + let notetype = self.get_notetype_by_name("basic")?.unwrap(); + let mut note = notetype.new_note(); + note.set_field(0, tr.exporting_colpkg_too_new())?; + self.add_note(&mut note, DeckId(1))?; + Ok(()) + } +} + +/// Copy contents of reader into writer, compressing as we copy. +fn zstd_copy(reader: &mut impl Read, writer: &mut impl Write, size: usize) -> Result<()> { + let mut encoder = Encoder::new(writer, 0)?; + if size > MULTITHREAD_MIN_BYTES { + encoder.multithread(num_cpus::get() as u32)?; + } + io::copy(reader, &mut encoder)?; + encoder.finish()?; + Ok(()) +} + +fn write_media( + meta: Meta, + zip: &mut ZipWriter, + media_dir: Option, + progress_fn: impl FnMut(usize), +) -> Result<()> { + let mut media_names = vec![]; + + if let Some(media_dir) = media_dir { + write_media_files(meta, zip, &media_dir, &mut media_names, progress_fn)?; + } + + write_media_map(meta, &media_names, zip)?; + + Ok(()) +} + +fn write_media_map(meta: Meta, media_names: &[String], zip: &mut ZipWriter) -> Result<()> { + zip.start_file("media", file_options_stored())?; + let json_bytes = if meta.media_list_is_hashmap() { + let map: HashMap = media_names + .iter() + .enumerate() + .map(|(k, v)| (k.to_string(), v.as_str())) + .collect(); + serde_json::to_vec(&map)? + } else { + serde_json::to_vec(media_names)? + }; + let size = json_bytes.len(); + let mut cursor = std::io::Cursor::new(json_bytes); + if meta.zstd_compressed() { + zstd_copy(&mut cursor, zip, size)?; + } else { + io::copy(&mut cursor, zip)?; + } + Ok(()) +} + +fn write_media_files( + meta: Meta, + zip: &mut ZipWriter, + dir: &Path, + names: &mut Vec, + mut progress_fn: impl FnMut(usize), +) -> Result<()> { + let mut writer = MediaFileWriter::new(meta); + let mut index = 0; + for entry in read_dir(dir)? { + let entry = entry?; + if !entry.metadata()?.is_file() { + continue; + } + progress_fn(index); + names.push(normalized_unicode_file_name(&entry)?); + zip.start_file(index.to_string(), file_options_stored())?; + writer = writer.write(&mut File::open(entry.path())?, zip)?; + // can't enumerate(), as we skip folders + index += 1; + } + + Ok(()) +} + +fn normalized_unicode_file_name(entry: &DirEntry) -> Result { + entry + .file_name() + .to_str() + .map(|name| normalize_to_nfc(name).into()) + .ok_or_else(|| { + AnkiError::IoError(format!( + "non-unicode file name: {}", + entry.file_name().to_string_lossy() + )) + }) +} + +/// Writes media files while compressing according to the targeted version. +/// If compressing, the encoder is reused to optimize for repeated calls. +struct MediaFileWriter(Option>); + +impl MediaFileWriter { + fn new(meta: Meta) -> Self { + Self( + meta.zstd_compressed() + .then(|| RawEncoder::with_dictionary(0, &[]).unwrap()), + ) + } + + fn write(mut self, reader: &mut impl Read, writer: &mut impl Write) -> Result { + // take [self] by value to prevent it from being reused after an error + if let Some(encoder) = self.0.take() { + let mut encoder_writer = Writer::new(writer, encoder); + io::copy(reader, &mut encoder_writer)?; + encoder_writer.finish()?; + self.0 = Some(encoder_writer.into_inner().1); + } else { + io::copy(reader, writer)?; + } + + Ok(self) + } +} diff --git a/rslib/src/collection/mod.rs b/rslib/src/collection/mod.rs index b5d8fa4f2..27e3aabb5 100644 --- a/rslib/src/collection/mod.rs +++ b/rslib/src/collection/mod.rs @@ -2,6 +2,7 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html pub mod backup; +pub mod exporting; pub(crate) mod timestamps; mod transact; pub(crate) mod undo;