diff --git a/rslib/src/import_export/package/apkg/import.rs b/rslib/src/import_export/package/apkg/import.rs index 04a119b24..05e691b20 100644 --- a/rslib/src/import_export/package/apkg/import.rs +++ b/rslib/src/import_export/package/apkg/import.rs @@ -81,7 +81,7 @@ impl Collection { fn build_media_map(archive: &mut ZipArchive) -> Result> { Ok(extract_media_entries(&Meta::new_legacy(), archive)? .into_iter() - .map(|entry| (entry.name, entry.legacy_zip_filename.unwrap().to_string())) + .map(|entry| (entry.name, entry.index.to_string())) .collect()) } diff --git a/rslib/src/import_export/package/colpkg/import.rs b/rslib/src/import_export/package/colpkg/import.rs index e9a96d7ab..2dbb49d95 100644 --- a/rslib/src/import_export/package/colpkg/import.rs +++ b/rslib/src/import_export/package/colpkg/import.rs @@ -14,7 +14,10 @@ use crate::{ collection::CollectionBuilder, error::ImportError, import_export::{ - package::{media::extract_media_entries, MediaEntry, Meta}, + package::{ + media::{extract_media_entries, SafeMediaEntry}, + Meta, + }, ImportProgress, }, io::{atomic_rename, tempfile_in_parent_of}, @@ -70,24 +73,11 @@ fn restore_media( let media_entries = extract_media_entries(meta, archive)?; std::fs::create_dir_all(media_folder)?; - for (entry_idx, entry) in media_entries.iter().enumerate() { - if entry_idx % 10 == 0 { - progress_fn(ImportProgress::Media(entry_idx))?; - } - - let zip_filename = entry - .legacy_zip_filename - .map(|n| n as usize) - .unwrap_or(entry_idx) - .to_string(); - - if let Ok(mut zip_file) = archive.by_name(&zip_filename) { - maybe_restore_media_file(meta, media_folder, entry, &mut zip_file)?; - } else { - return Err(AnkiError::invalid_input(&format!( - "{zip_filename} missing from archive" - ))); + for entry in &media_entries { + if entry.index % 10 == 0 { + progress_fn(ImportProgress::Media(entry.index))?; } + maybe_restore_media_file(meta, media_folder, archive, entry)?; } Ok(()) @@ -96,13 +86,14 @@ fn restore_media( fn maybe_restore_media_file( meta: &Meta, media_folder: &Path, - entry: &MediaEntry, - zip_file: &mut ZipFile, + archive: &mut ZipArchive, + entry: &SafeMediaEntry, ) -> Result<()> { let file_path = entry.file_path(media_folder); - let already_exists = entry.is_equal_to(meta, zip_file, &file_path); + let mut zip_file = entry.fetch_file(archive)?; + let already_exists = entry.is_equal_to(meta, &zip_file, &file_path); if !already_exists { - restore_media_file(meta, zip_file, &file_path)?; + restore_media_file(meta, &mut zip_file, &file_path)?; }; Ok(()) diff --git a/rslib/src/import_export/package/media.rs b/rslib/src/import_export/package/media.rs index 1ced69fbb..be25d1c77 100644 --- a/rslib/src/import_export/package/media.rs +++ b/rslib/src/import_export/package/media.rs @@ -18,6 +18,15 @@ use crate::{ error::ImportError, io::filename_is_safe, media::files::normalize_filename, prelude::*, }; +/// Like [MediaEntry], but with a safe filename and set zip filename. +pub(super) struct SafeMediaEntry { + pub(super) name: String, + pub(super) size: u32, + #[allow(dead_code)] + pub(super) sha1: [u8; 20], + pub(super) index: usize, +} + impl MediaEntry { pub(super) fn new( name: impl Into, @@ -31,9 +40,26 @@ impl MediaEntry { legacy_zip_filename: None, } } +} + +impl SafeMediaEntry { + pub(super) fn from_entry(enumerated: (usize, MediaEntry)) -> Result { + let (index, entry) = enumerated; + if let Ok(sha1) = entry.sha1.try_into() { + if !matches!(safe_normalized_file_name(&entry.name)?, Cow::Owned(_)) { + return Ok(Self { + name: entry.name, + size: entry.size, + sha1, + index, + }); + } + } + Err(AnkiError::ImportError(ImportError::Corrupt)) + } pub(super) fn from_legacy(legacy_entry: (&str, String)) -> Result { - let idx: u32 = legacy_entry.0.parse()?; + let zip_filename: usize = legacy_entry.0.parse()?; let name = match safe_normalized_file_name(&legacy_entry.1)? { Cow::Owned(new_name) => new_name, Cow::Borrowed(_) => legacy_entry.1, @@ -41,8 +67,8 @@ impl MediaEntry { Ok(Self { name, size: 0, - sha1: vec![], - legacy_zip_filename: Some(idx), + sha1: [0; 20], + index: zip_filename, }) } @@ -50,6 +76,12 @@ impl MediaEntry { media_folder.join(&self.name) } + pub(super) fn fetch_file<'a>(&self, archive: &'a mut ZipArchive) -> Result> { + archive + .by_name(&self.index.to_string()) + .map_err(|_| AnkiError::invalid_input(&format!("{} missing from archive", self.index))) + } + pub(super) fn is_equal_to( &self, meta: &Meta, @@ -71,13 +103,13 @@ impl MediaEntry { pub(super) fn extract_media_entries( meta: &Meta, archive: &mut ZipArchive, -) -> Result> { +) -> Result> { let media_list_data = get_media_list_data(archive, meta)?; if meta.media_list_is_hashmap() { let map: HashMap<&str, String> = serde_json::from_slice(&media_list_data)?; - map.into_iter().map(MediaEntry::from_legacy).collect() + map.into_iter().map(SafeMediaEntry::from_legacy).collect() } else { - MediaEntries::decode_checked(&media_list_data).map(|m| m.entries) + MediaEntries::decode_safe_entries(&media_list_data) } } @@ -101,14 +133,14 @@ fn get_media_list_data(archive: &mut ZipArchive, meta: &Meta) -> Result Result { + fn decode_safe_entries(buf: &[u8]) -> Result> { let entries: Self = Message::decode(buf)?; - for entry in &entries.entries { - if matches!(safe_normalized_file_name(&entry.name)?, Cow::Owned(_)) { - return Err(AnkiError::ImportError(ImportError::Corrupt)); - } - } - Ok(entries) + entries + .entries + .into_iter() + .enumerate() + .map(SafeMediaEntry::from_entry) + .collect() } } @@ -119,7 +151,7 @@ mod test { #[test] fn normalization() { // legacy entries get normalized on deserialisation - let entry = MediaEntry::from_legacy(("1", "con".to_owned())).unwrap(); + let entry = SafeMediaEntry::from_legacy(("1", "con".to_owned())).unwrap(); assert_eq!(entry.name, "con_"); // new-style entries should have been normalized on export @@ -129,6 +161,6 @@ mod test { } .encode(&mut entries) .unwrap(); - assert!(MediaEntries::decode_checked(&entries).is_err()); + assert!(MediaEntries::decode_safe_entries(&entries).is_err()); } }