Handle gaps in media in colpkg imports

Our old Python code was also skipping numbers when it encountered a
directory, leading to a colpkg that couldn't be imported with our new
code.
This commit is contained in:
Damien Elmes 2022-03-31 10:35:15 +10:00
parent 5b1fcccf33
commit 9d64afc7bc
3 changed files with 27 additions and 25 deletions

View file

@ -46,6 +46,11 @@ message MediaEntries {
string name = 1;
uint32 size = 2;
bytes sha1 = 3;
/// Legacy media maps may include gaps in the media list, so the original
/// file index is recorded when importing from a HashMap. This field is not
/// set when exporting.
optional uint32 legacy_zip_filename = 255;
}
repeated MediaEntry entries = 1;

View file

@ -278,6 +278,7 @@ impl MediaEntry {
name: name.into(),
size: size.try_into().unwrap_or_default(),
sha1: sha1.into(),
legacy_zip_filename: None,
}
}
}

View file

@ -102,16 +102,22 @@ fn restore_media(
let media_entries = extract_media_entries(meta, archive)?;
std::fs::create_dir_all(media_folder)?;
for (archive_file_name, entry) in media_entries.iter().enumerate() {
if archive_file_name % 10 == 0 {
progress_fn(ImportProgress::Media(archive_file_name))?;
for (entry_idx, entry) in media_entries.iter().enumerate() {
if entry_idx % 10 == 0 {
progress_fn(ImportProgress::Media(entry_idx))?;
}
if let Ok(mut zip_file) = archive.by_name(&archive_file_name.to_string()) {
let zip_filename = entry
.legacy_zip_filename
.map(|n| n as usize)
.unwrap_or(entry_idx)
.to_string();
if let Ok(mut zip_file) = archive.by_name(&zip_filename) {
maybe_restore_media_file(meta, media_folder, entry, &mut zip_file)?;
} else {
return Err(AnkiError::invalid_input(&format!(
"{archive_file_name} missing from archive"
"{zip_filename} missing from archive"
)));
}
}
@ -203,27 +209,17 @@ fn extract_media_entries(meta: &Meta, archive: &mut ZipArchive<File>) -> Result<
}
if meta.media_list_is_hashmap() {
let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
let mut entries: Vec<(usize, String)> = map
.into_iter()
.map(|(k, v)| (k.parse().unwrap_or_default(), v))
.collect();
entries.sort_unstable();
// any gaps in the file numbers would lead to media being imported under the wrong name
if entries
.iter()
.enumerate()
.any(|(idx1, (idx2, _))| idx1 != *idx2)
{
return Err(AnkiError::ImportError(ImportError::Corrupt));
}
Ok(entries
.into_iter()
.map(|(_str_idx, name)| MediaEntry {
name,
size: 0,
sha1: vec![],
map.into_iter()
.map(|(idx_str, name)| {
let idx: u32 = idx_str.parse()?;
Ok(MediaEntry {
name,
size: 0,
sha1: vec![],
legacy_zip_filename: Some(idx),
})
})
.collect())
.collect()
} else {
let entries: MediaEntries = Message::decode(&*buf)?;
Ok(entries.entries)