From 9d64afc7bc92e1acee3af394ff997f91d3fe27bf Mon Sep 17 00:00:00 2001
From: Damien Elmes <gpg@ankiweb.net>
Date: Thu, 31 Mar 2022 10:35:15 +1000
Subject: [PATCH] Handle gaps in media in colpkg imports

Our old Python code was also skipping numbers when it encountered a
directory, leading to a colpkg that couldn't be imported with our new
code.
---
 proto/anki/import_export.proto                |  5 ++
 .../import_export/package/colpkg/export.rs    |  1 +
 .../import_export/package/colpkg/import.rs    | 46 +++++++++----------
 3 files changed, 27 insertions(+), 25 deletions(-)
diff --git a/proto/anki/import_export.proto b/proto/anki/import_export.proto
index 94711c31a..ea8bfe8ad 100644
--- a/proto/anki/import_export.proto
+++ b/proto/anki/import_export.proto
@@ -46,6 +46,11 @@ message MediaEntries {
     string name = 1;
     uint32 size = 2;
     bytes sha1 = 3;
+
+    /// Legacy media maps may include gaps in the media list, so the original
+    /// file index is recorded when importing from a HashMap. This field is not
+    /// set when exporting.
+    optional uint32 legacy_zip_filename = 255;
   }
 
   repeated MediaEntry entries = 1;
diff --git a/rslib/src/import_export/package/colpkg/export.rs b/rslib/src/import_export/package/colpkg/export.rs
index e2247bc83..6dc5de69f 100644
--- a/rslib/src/import_export/package/colpkg/export.rs
+++ b/rslib/src/import_export/package/colpkg/export.rs
@@ -278,6 +278,7 @@ impl MediaEntry {
             name: name.into(),
             size: size.try_into().unwrap_or_default(),
             sha1: sha1.into(),
+            legacy_zip_filename: None,
         }
     }
 }
diff --git a/rslib/src/import_export/package/colpkg/import.rs b/rslib/src/import_export/package/colpkg/import.rs
index 7664637a4..bd6a6fe00 100644
--- a/rslib/src/import_export/package/colpkg/import.rs
+++ b/rslib/src/import_export/package/colpkg/import.rs
@@ -102,16 +102,22 @@ fn restore_media(
     let media_entries = extract_media_entries(meta, archive)?;
     std::fs::create_dir_all(media_folder)?;
 
-    for (archive_file_name, entry) in media_entries.iter().enumerate() {
-        if archive_file_name % 10 == 0 {
-            progress_fn(ImportProgress::Media(archive_file_name))?;
+    for (entry_idx, entry) in media_entries.iter().enumerate() {
+        if entry_idx % 10 == 0 {
+            progress_fn(ImportProgress::Media(entry_idx))?;
         }
 
-        if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) {
+        let zip_filename = entry
+            .legacy_zip_filename
+            .map(|n| n as usize)
+            .unwrap_or(entry_idx)
+            .to_string();
+
+        if let Ok(mut zip_file) = archive.by_name(&zip_filename) {
             maybe_restore_media_file(meta, media_folder, entry, &mut zip_file)?;
         } else {
             return Err(AnkiError::invalid_input(&format!(
-                "{archive_file_name} missing from archive"
+                "{zip_filename} missing from archive"
             )));
         }
     }
@@ -203,27 +209,17 @@ fn extract_media_entries(meta: &Meta, archive: &mut ZipArchive<File>) -> Result<
     }
     if meta.media_list_is_hashmap() {
         let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
-        let mut entries: Vec<(usize, String)> = map
-            .into_iter()
-            .map(|(k, v)| (k.parse().unwrap_or_default(), v))
-            .collect();
-        entries.sort_unstable();
-        // any gaps in the file numbers would lead to media being imported under the wrong name
-        if entries
-            .iter()
-            .enumerate()
-            .any(|(idx1, (idx2, _))| idx1 != *idx2)
-        {
-            return Err(AnkiError::ImportError(ImportError::Corrupt));
-        }
-        Ok(entries
-            .into_iter()
-            .map(|(_str_idx, name)| MediaEntry {
-                name,
-                size: 0,
-                sha1: vec![],
+        map.into_iter()
+            .map(|(idx_str, name)| {
+                let idx: u32 = idx_str.parse()?;
+                Ok(MediaEntry {
+                    name,
+                    size: 0,
+                    sha1: vec![],
+                    legacy_zip_filename: Some(idx),
+                })
             })
-            .collect())
+            .collect()
     } else {
         let entries: MediaEntries = Message::decode(&*buf)?;
         Ok(entries.entries)