From 9d64afc7bc92e1acee3af394ff997f91d3fe27bf Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Thu, 31 Mar 2022 10:35:15 +1000 Subject: [PATCH] Handle gaps in media in colpkg imports Our old Python code was also skipping numbers when it encountered a directory, leading to a colpkg that couldn't be imported with our new code. --- proto/anki/import_export.proto | 5 ++ .../import_export/package/colpkg/export.rs | 1 + .../import_export/package/colpkg/import.rs | 46 +++++++++---------- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto index 94711c31a..ea8bfe8ad 100644 --- a/proto/anki/import_export.proto +++ b/proto/anki/import_export.proto @@ -46,6 +46,11 @@ message MediaEntries { string name = 1; uint32 size = 2; bytes sha1 = 3; + + /// Legacy media maps may include gaps in the media list, so the original + /// file index is recorded when importing from a HashMap. This field is not + /// set when exporting. + optional uint32 legacy_zip_filename = 255; } repeated MediaEntry entries = 1; diff --git a/rslib/src/import_export/package/colpkg/export.rs b/rslib/src/import_export/package/colpkg/export.rs index e2247bc83..6dc5de69f 100644 --- a/rslib/src/import_export/package/colpkg/export.rs +++ b/rslib/src/import_export/package/colpkg/export.rs @@ -278,6 +278,7 @@ impl MediaEntry { name: name.into(), size: size.try_into().unwrap_or_default(), sha1: sha1.into(), + legacy_zip_filename: None, } } } diff --git a/rslib/src/import_export/package/colpkg/import.rs b/rslib/src/import_export/package/colpkg/import.rs index 7664637a4..bd6a6fe00 100644 --- a/rslib/src/import_export/package/colpkg/import.rs +++ b/rslib/src/import_export/package/colpkg/import.rs @@ -102,16 +102,22 @@ fn restore_media( let media_entries = extract_media_entries(meta, archive)?; std::fs::create_dir_all(media_folder)?; - for (archive_file_name, entry) in media_entries.iter().enumerate() { - if archive_file_name % 10 == 0 { - progress_fn(ImportProgress::Media(archive_file_name))?; + for (entry_idx, entry) in media_entries.iter().enumerate() { + if entry_idx % 10 == 0 { + progress_fn(ImportProgress::Media(entry_idx))?; } - if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) { + let zip_filename = entry + .legacy_zip_filename + .map(|n| n as usize) + .unwrap_or(entry_idx) + .to_string(); + + if let Ok(mut zip_file) = archive.by_name(&zip_filename) { maybe_restore_media_file(meta, media_folder, entry, &mut zip_file)?; } else { return Err(AnkiError::invalid_input(&format!( - "{archive_file_name} missing from archive" + "{zip_filename} missing from archive" ))); } } @@ -203,27 +209,17 @@ fn extract_media_entries(meta: &Meta, archive: &mut ZipArchive) -> Result< } if meta.media_list_is_hashmap() { let map: HashMap<&str, String> = serde_json::from_slice(&buf)?; - let mut entries: Vec<(usize, String)> = map - .into_iter() - .map(|(k, v)| (k.parse().unwrap_or_default(), v)) - .collect(); - entries.sort_unstable(); - // any gaps in the file numbers would lead to media being imported under the wrong name - if entries - .iter() - .enumerate() - .any(|(idx1, (idx2, _))| idx1 != *idx2) - { - return Err(AnkiError::ImportError(ImportError::Corrupt)); - } - Ok(entries - .into_iter() - .map(|(_str_idx, name)| MediaEntry { - name, - size: 0, - sha1: vec![], + map.into_iter() + .map(|(idx_str, name)| { + let idx: u32 = idx_str.parse()?; + Ok(MediaEntry { + name, + size: 0, + sha1: vec![], + legacy_zip_filename: Some(idx), + }) }) - .collect()) + .collect() } else { let entries: MediaEntries = Message::decode(&*buf)?; Ok(entries.entries)