diff --git a/rslib/src/import_export/package/apkg/import.rs b/rslib/src/import_export/package/apkg/import.rs index e4a07ef85..04a119b24 100644 --- a/rslib/src/import_export/package/apkg/import.rs +++ b/rslib/src/import_export/package/apkg/import.rs @@ -18,7 +18,7 @@ use crate::{ collection::CollectionBuilder, import_export::{ gather::ExchangeData, - package::{colpkg::import::extract_media_entries, Meta}, + package::{media::extract_media_entries, Meta}, }, io::{atomic_rename, tempfile_in_parent_of}, prelude::*, diff --git a/rslib/src/import_export/package/colpkg/export.rs b/rslib/src/import_export/package/colpkg/export.rs index 002ebff43..45cd7a251 100644 --- a/rslib/src/import_export/package/colpkg/export.rs +++ b/rslib/src/import_export/package/colpkg/export.rs @@ -306,17 +306,6 @@ fn write_media_files( Ok(()) } -impl MediaEntry { - fn new(name: impl Into, size: impl TryInto, sha1: impl Into>) -> Self { - MediaEntry { - name: name.into(), - size: size.try_into().unwrap_or_default(), - sha1: sha1.into(), - legacy_zip_filename: None, - } - } -} - fn normalized_unicode_file_name(filename: &OsStr) -> Result { let filename = filename.to_str().ok_or_else(|| { AnkiError::IoError(format!( diff --git a/rslib/src/import_export/package/colpkg/import.rs b/rslib/src/import_export/package/colpkg/import.rs index 95b5383e7..e9a96d7ab 100644 --- a/rslib/src/import_export/package/colpkg/import.rs +++ b/rslib/src/import_export/package/colpkg/import.rs @@ -2,57 +2,25 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use std::{ - borrow::Cow, - collections::HashMap, - fs::{self, File}, - io::{self, Read, Write}, + fs::File, + io::{self, Write}, path::{Path, PathBuf}, }; -use prost::Message; use zip::{read::ZipFile, ZipArchive}; use zstd::{self, stream::copy_decode}; -use super::super::Version; use crate::{ collection::CollectionBuilder, error::ImportError, import_export::{ - package::{MediaEntries, MediaEntry, Meta}, + package::{media::extract_media_entries, MediaEntry, Meta}, ImportProgress, }, - io::{atomic_rename, filename_is_safe, tempfile_in_parent_of}, - media::files::normalize_filename, + io::{atomic_rename, tempfile_in_parent_of}, prelude::*, }; -impl Meta { - /// Extracts meta data from an archive and checks if its version is supported. - pub(super) fn from_archive(archive: &mut ZipArchive) -> Result { - let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| { - let mut buf = vec![]; - meta_file.read_to_end(&mut buf).ok()?; - Some(buf) - }); - let meta = if let Some(bytes) = meta_bytes { - let meta: Meta = Message::decode(&*bytes)?; - if meta.version() == Version::Unknown { - return Err(AnkiError::ImportError(ImportError::TooNew)); - } - meta - } else { - Meta { - version: if archive.by_name("collection.anki21").is_ok() { - Version::Legacy2 - } else { - Version::Legacy1 - } as i32, - } - }; - Ok(meta) - } -} - pub fn import_colpkg( colpkg_path: &str, target_col_path: &str, @@ -131,7 +99,7 @@ fn maybe_restore_media_file( entry: &MediaEntry, zip_file: &mut ZipFile, ) -> Result<()> { - let file_path = entry.safe_normalized_file_path(meta, media_folder)?; + let file_path = entry.file_path(media_folder); let already_exists = entry.is_equal_to(meta, zip_file, &file_path); if !already_exists { restore_media_file(meta, zip_file, &file_path)?; @@ -153,70 +121,6 @@ fn restore_media_file(meta: &Meta, zip_file: &mut ZipFile, path: &Path) -> Resul atomic_rename(tempfile, path, false) } -impl MediaEntry { - fn safe_normalized_file_path(&self, meta: &Meta, media_folder: &Path) -> Result { - if !filename_is_safe(&self.name) { - return Err(AnkiError::ImportError(ImportError::Corrupt)); - } - let normalized = maybe_normalizing(&self.name, meta.strict_media_checks())?; - Ok(media_folder.join(normalized.as_ref())) - } - - fn is_equal_to(&self, meta: &Meta, self_zipped: &ZipFile, other_path: &Path) -> bool { - // TODO: checks hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147) - let self_size = if meta.media_list_is_hashmap() { - self_zipped.size() - } else { - self.size as u64 - }; - fs::metadata(other_path) - .map(|metadata| metadata.len() as u64 == self_size) - .unwrap_or_default() - } -} - -/// - If strict is true, return an error if not normalized. -/// - If false, return the normalized version. -fn maybe_normalizing(name: &str, strict: bool) -> Result> { - let normalized = normalize_filename(name); - if strict && matches!(normalized, Cow::Owned(_)) { - // exporting code should have checked this - Err(AnkiError::ImportError(ImportError::Corrupt)) - } else { - Ok(normalized) - } -} - -pub(crate) fn extract_media_entries( - meta: &Meta, - archive: &mut ZipArchive, -) -> Result> { - let mut file = archive.by_name("media")?; - let mut buf = Vec::new(); - if meta.zstd_compressed() { - copy_decode(file, &mut buf)?; - } else { - io::copy(&mut file, &mut buf)?; - } - if meta.media_list_is_hashmap() { - let map: HashMap<&str, String> = serde_json::from_slice(&buf)?; - map.into_iter() - .map(|(idx_str, name)| { - let idx: u32 = idx_str.parse()?; - Ok(MediaEntry { - name, - size: 0, - sha1: vec![], - legacy_zip_filename: Some(idx), - }) - }) - .collect() - } else { - let entries: MediaEntries = Message::decode(&*buf)?; - Ok(entries.entries) - } -} - fn copy_collection( archive: &mut ZipArchive, writer: &mut impl Write, @@ -233,14 +137,3 @@ fn copy_collection( Ok(()) } - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn normalization() { - assert_eq!(&maybe_normalizing("con", false).unwrap(), "con_"); - assert!(&maybe_normalizing("con", true).is_err()); - } -} diff --git a/rslib/src/import_export/package/media.rs b/rslib/src/import_export/package/media.rs new file mode 100644 index 000000000..1ced69fbb --- /dev/null +++ b/rslib/src/import_export/package/media.rs @@ -0,0 +1,134 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::{ + borrow::Cow, + collections::HashMap, + fs::{self, File}, + io, + path::{Path, PathBuf}, +}; + +use prost::Message; +use zip::{read::ZipFile, ZipArchive}; +use zstd::stream::copy_decode; + +use super::{MediaEntries, MediaEntry, Meta}; +use crate::{ + error::ImportError, io::filename_is_safe, media::files::normalize_filename, prelude::*, +}; + +impl MediaEntry { + pub(super) fn new( + name: impl Into, + size: impl TryInto, + sha1: impl Into>, + ) -> Self { + MediaEntry { + name: name.into(), + size: size.try_into().unwrap_or_default(), + sha1: sha1.into(), + legacy_zip_filename: None, + } + } + + pub(super) fn from_legacy(legacy_entry: (&str, String)) -> Result { + let idx: u32 = legacy_entry.0.parse()?; + let name = match safe_normalized_file_name(&legacy_entry.1)? { + Cow::Owned(new_name) => new_name, + Cow::Borrowed(_) => legacy_entry.1, + }; + Ok(Self { + name, + size: 0, + sha1: vec![], + legacy_zip_filename: Some(idx), + }) + } + + pub(super) fn file_path(&self, media_folder: &Path) -> PathBuf { + media_folder.join(&self.name) + } + + pub(super) fn is_equal_to( + &self, + meta: &Meta, + self_zipped: &ZipFile, + other_path: &Path, + ) -> bool { + // TODO: check hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147) + let self_size = if meta.media_list_is_hashmap() { + self_zipped.size() + } else { + self.size as u64 + }; + fs::metadata(other_path) + .map(|metadata| metadata.len() as u64 == self_size) + .unwrap_or_default() + } +} + +pub(super) fn extract_media_entries( + meta: &Meta, + archive: &mut ZipArchive, +) -> Result> { + let media_list_data = get_media_list_data(archive, meta)?; + if meta.media_list_is_hashmap() { + let map: HashMap<&str, String> = serde_json::from_slice(&media_list_data)?; + map.into_iter().map(MediaEntry::from_legacy).collect() + } else { + MediaEntries::decode_checked(&media_list_data).map(|m| m.entries) + } +} + +fn safe_normalized_file_name(name: &str) -> Result> { + if !filename_is_safe(name) { + Err(AnkiError::ImportError(ImportError::Corrupt)) + } else { + Ok(normalize_filename(name)) + } +} + +fn get_media_list_data(archive: &mut ZipArchive, meta: &Meta) -> Result> { + let mut file = archive.by_name("media")?; + let mut buf = Vec::new(); + if meta.zstd_compressed() { + copy_decode(file, &mut buf)?; + } else { + io::copy(&mut file, &mut buf)?; + } + Ok(buf) +} + +impl MediaEntries { + fn decode_checked(buf: &[u8]) -> Result { + let entries: Self = Message::decode(buf)?; + for entry in &entries.entries { + if matches!(safe_normalized_file_name(&entry.name)?, Cow::Owned(_)) { + return Err(AnkiError::ImportError(ImportError::Corrupt)); + } + } + Ok(entries) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn normalization() { + // legacy entries get normalized on deserialisation + let entry = MediaEntry::from_legacy(("1", "con".to_owned())).unwrap(); + assert_eq!(entry.name, "con_"); + + // new-style entries should have been normalized on export + let mut entries = Vec::new(); + MediaEntries { + entries: vec![MediaEntry::new("con", 0, Vec::new())], + } + .encode(&mut entries) + .unwrap(); + assert!(MediaEntries::decode_checked(&entries).is_err()); + } +} diff --git a/rslib/src/import_export/package/meta.rs b/rslib/src/import_export/package/meta.rs index c2ac4e80c..b8ca9a8ca 100644 --- a/rslib/src/import_export/package/meta.rs +++ b/rslib/src/import_export/package/meta.rs @@ -1,7 +1,13 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +use std::{fs::File, io::Read}; + +use prost::Message; +use zip::ZipArchive; + pub(super) use crate::backend_proto::{package_metadata::Version, PackageMetadata as Meta}; +use crate::{error::ImportError, prelude::*}; impl Version { pub(super) fn collection_filename(&self) -> &'static str { @@ -27,6 +33,31 @@ impl Meta { } } + /// Extracts meta data from an archive and checks if its version is supported. + pub(super) fn from_archive(archive: &mut ZipArchive) -> Result { + let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| { + let mut buf = vec![]; + meta_file.read_to_end(&mut buf).ok()?; + Some(buf) + }); + let meta = if let Some(bytes) = meta_bytes { + let meta: Meta = Message::decode(&*bytes)?; + if meta.version() == Version::Unknown { + return Err(AnkiError::ImportError(ImportError::TooNew)); + } + meta + } else { + Meta { + version: if archive.by_name("collection.anki21").is_ok() { + Version::Legacy2 + } else { + Version::Legacy1 + } as i32, + } + }; + Ok(meta) + } + pub(super) fn collection_filename(&self) -> &'static str { self.version().collection_filename() } @@ -39,10 +70,6 @@ impl Meta { self.is_legacy() } - pub(super) fn strict_media_checks(&self) -> bool { - !self.is_legacy() - } - fn is_legacy(&self) -> bool { matches!(self.version(), Version::Legacy1 | Version::Legacy2) } diff --git a/rslib/src/import_export/package/mod.rs b/rslib/src/import_export/package/mod.rs index 9ce6c4d33..70b8bfb14 100644 --- a/rslib/src/import_export/package/mod.rs +++ b/rslib/src/import_export/package/mod.rs @@ -3,6 +3,7 @@ mod apkg; mod colpkg; +mod media; mod meta; pub(crate) use apkg::NoteMeta;