mirror of
https://github.com/ankitects/anki.git
synced 2025-09-21 15:32:23 -04:00
Refactor import-export/package
- Move media and meta code into appropriate modules. - Normalize/check for normalization when deserializing media entries.
This commit is contained in:
parent
80dc3ae99e
commit
efde7c7acc
6 changed files with 172 additions and 128 deletions
|
@ -18,7 +18,7 @@ use crate::{
|
||||||
collection::CollectionBuilder,
|
collection::CollectionBuilder,
|
||||||
import_export::{
|
import_export::{
|
||||||
gather::ExchangeData,
|
gather::ExchangeData,
|
||||||
package::{colpkg::import::extract_media_entries, Meta},
|
package::{media::extract_media_entries, Meta},
|
||||||
},
|
},
|
||||||
io::{atomic_rename, tempfile_in_parent_of},
|
io::{atomic_rename, tempfile_in_parent_of},
|
||||||
prelude::*,
|
prelude::*,
|
||||||
|
|
|
@ -306,17 +306,6 @@ fn write_media_files(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MediaEntry {
|
|
||||||
fn new(name: impl Into<String>, size: impl TryInto<u32>, sha1: impl Into<Vec<u8>>) -> Self {
|
|
||||||
MediaEntry {
|
|
||||||
name: name.into(),
|
|
||||||
size: size.try_into().unwrap_or_default(),
|
|
||||||
sha1: sha1.into(),
|
|
||||||
legacy_zip_filename: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn normalized_unicode_file_name(filename: &OsStr) -> Result<String> {
|
fn normalized_unicode_file_name(filename: &OsStr) -> Result<String> {
|
||||||
let filename = filename.to_str().ok_or_else(|| {
|
let filename = filename.to_str().ok_or_else(|| {
|
||||||
AnkiError::IoError(format!(
|
AnkiError::IoError(format!(
|
||||||
|
|
|
@ -2,57 +2,25 @@
|
||||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
borrow::Cow,
|
fs::File,
|
||||||
collections::HashMap,
|
io::{self, Write},
|
||||||
fs::{self, File},
|
|
||||||
io::{self, Read, Write},
|
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
};
|
};
|
||||||
|
|
||||||
use prost::Message;
|
|
||||||
use zip::{read::ZipFile, ZipArchive};
|
use zip::{read::ZipFile, ZipArchive};
|
||||||
use zstd::{self, stream::copy_decode};
|
use zstd::{self, stream::copy_decode};
|
||||||
|
|
||||||
use super::super::Version;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
collection::CollectionBuilder,
|
collection::CollectionBuilder,
|
||||||
error::ImportError,
|
error::ImportError,
|
||||||
import_export::{
|
import_export::{
|
||||||
package::{MediaEntries, MediaEntry, Meta},
|
package::{media::extract_media_entries, MediaEntry, Meta},
|
||||||
ImportProgress,
|
ImportProgress,
|
||||||
},
|
},
|
||||||
io::{atomic_rename, filename_is_safe, tempfile_in_parent_of},
|
io::{atomic_rename, tempfile_in_parent_of},
|
||||||
media::files::normalize_filename,
|
|
||||||
prelude::*,
|
prelude::*,
|
||||||
};
|
};
|
||||||
|
|
||||||
impl Meta {
|
|
||||||
/// Extracts meta data from an archive and checks if its version is supported.
|
|
||||||
pub(super) fn from_archive(archive: &mut ZipArchive<File>) -> Result<Self> {
|
|
||||||
let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| {
|
|
||||||
let mut buf = vec![];
|
|
||||||
meta_file.read_to_end(&mut buf).ok()?;
|
|
||||||
Some(buf)
|
|
||||||
});
|
|
||||||
let meta = if let Some(bytes) = meta_bytes {
|
|
||||||
let meta: Meta = Message::decode(&*bytes)?;
|
|
||||||
if meta.version() == Version::Unknown {
|
|
||||||
return Err(AnkiError::ImportError(ImportError::TooNew));
|
|
||||||
}
|
|
||||||
meta
|
|
||||||
} else {
|
|
||||||
Meta {
|
|
||||||
version: if archive.by_name("collection.anki21").is_ok() {
|
|
||||||
Version::Legacy2
|
|
||||||
} else {
|
|
||||||
Version::Legacy1
|
|
||||||
} as i32,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
Ok(meta)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn import_colpkg(
|
pub fn import_colpkg(
|
||||||
colpkg_path: &str,
|
colpkg_path: &str,
|
||||||
target_col_path: &str,
|
target_col_path: &str,
|
||||||
|
@ -131,7 +99,7 @@ fn maybe_restore_media_file(
|
||||||
entry: &MediaEntry,
|
entry: &MediaEntry,
|
||||||
zip_file: &mut ZipFile,
|
zip_file: &mut ZipFile,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let file_path = entry.safe_normalized_file_path(meta, media_folder)?;
|
let file_path = entry.file_path(media_folder);
|
||||||
let already_exists = entry.is_equal_to(meta, zip_file, &file_path);
|
let already_exists = entry.is_equal_to(meta, zip_file, &file_path);
|
||||||
if !already_exists {
|
if !already_exists {
|
||||||
restore_media_file(meta, zip_file, &file_path)?;
|
restore_media_file(meta, zip_file, &file_path)?;
|
||||||
|
@ -153,70 +121,6 @@ fn restore_media_file(meta: &Meta, zip_file: &mut ZipFile, path: &Path) -> Resul
|
||||||
atomic_rename(tempfile, path, false)
|
atomic_rename(tempfile, path, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MediaEntry {
|
|
||||||
fn safe_normalized_file_path(&self, meta: &Meta, media_folder: &Path) -> Result<PathBuf> {
|
|
||||||
if !filename_is_safe(&self.name) {
|
|
||||||
return Err(AnkiError::ImportError(ImportError::Corrupt));
|
|
||||||
}
|
|
||||||
let normalized = maybe_normalizing(&self.name, meta.strict_media_checks())?;
|
|
||||||
Ok(media_folder.join(normalized.as_ref()))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_equal_to(&self, meta: &Meta, self_zipped: &ZipFile, other_path: &Path) -> bool {
|
|
||||||
// TODO: checks hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147)
|
|
||||||
let self_size = if meta.media_list_is_hashmap() {
|
|
||||||
self_zipped.size()
|
|
||||||
} else {
|
|
||||||
self.size as u64
|
|
||||||
};
|
|
||||||
fs::metadata(other_path)
|
|
||||||
.map(|metadata| metadata.len() as u64 == self_size)
|
|
||||||
.unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// - If strict is true, return an error if not normalized.
|
|
||||||
/// - If false, return the normalized version.
|
|
||||||
fn maybe_normalizing(name: &str, strict: bool) -> Result<Cow<str>> {
|
|
||||||
let normalized = normalize_filename(name);
|
|
||||||
if strict && matches!(normalized, Cow::Owned(_)) {
|
|
||||||
// exporting code should have checked this
|
|
||||||
Err(AnkiError::ImportError(ImportError::Corrupt))
|
|
||||||
} else {
|
|
||||||
Ok(normalized)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn extract_media_entries(
|
|
||||||
meta: &Meta,
|
|
||||||
archive: &mut ZipArchive<File>,
|
|
||||||
) -> Result<Vec<MediaEntry>> {
|
|
||||||
let mut file = archive.by_name("media")?;
|
|
||||||
let mut buf = Vec::new();
|
|
||||||
if meta.zstd_compressed() {
|
|
||||||
copy_decode(file, &mut buf)?;
|
|
||||||
} else {
|
|
||||||
io::copy(&mut file, &mut buf)?;
|
|
||||||
}
|
|
||||||
if meta.media_list_is_hashmap() {
|
|
||||||
let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
|
|
||||||
map.into_iter()
|
|
||||||
.map(|(idx_str, name)| {
|
|
||||||
let idx: u32 = idx_str.parse()?;
|
|
||||||
Ok(MediaEntry {
|
|
||||||
name,
|
|
||||||
size: 0,
|
|
||||||
sha1: vec![],
|
|
||||||
legacy_zip_filename: Some(idx),
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
} else {
|
|
||||||
let entries: MediaEntries = Message::decode(&*buf)?;
|
|
||||||
Ok(entries.entries)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn copy_collection(
|
fn copy_collection(
|
||||||
archive: &mut ZipArchive<File>,
|
archive: &mut ZipArchive<File>,
|
||||||
writer: &mut impl Write,
|
writer: &mut impl Write,
|
||||||
|
@ -233,14 +137,3 @@ fn copy_collection(
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn normalization() {
|
|
||||||
assert_eq!(&maybe_normalizing("con", false).unwrap(), "con_");
|
|
||||||
assert!(&maybe_normalizing("con", true).is_err());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
134
rslib/src/import_export/package/media.rs
Normal file
134
rslib/src/import_export/package/media.rs
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
borrow::Cow,
|
||||||
|
collections::HashMap,
|
||||||
|
fs::{self, File},
|
||||||
|
io,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
};
|
||||||
|
|
||||||
|
use prost::Message;
|
||||||
|
use zip::{read::ZipFile, ZipArchive};
|
||||||
|
use zstd::stream::copy_decode;
|
||||||
|
|
||||||
|
use super::{MediaEntries, MediaEntry, Meta};
|
||||||
|
use crate::{
|
||||||
|
error::ImportError, io::filename_is_safe, media::files::normalize_filename, prelude::*,
|
||||||
|
};
|
||||||
|
|
||||||
|
impl MediaEntry {
|
||||||
|
pub(super) fn new(
|
||||||
|
name: impl Into<String>,
|
||||||
|
size: impl TryInto<u32>,
|
||||||
|
sha1: impl Into<Vec<u8>>,
|
||||||
|
) -> Self {
|
||||||
|
MediaEntry {
|
||||||
|
name: name.into(),
|
||||||
|
size: size.try_into().unwrap_or_default(),
|
||||||
|
sha1: sha1.into(),
|
||||||
|
legacy_zip_filename: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn from_legacy(legacy_entry: (&str, String)) -> Result<Self> {
|
||||||
|
let idx: u32 = legacy_entry.0.parse()?;
|
||||||
|
let name = match safe_normalized_file_name(&legacy_entry.1)? {
|
||||||
|
Cow::Owned(new_name) => new_name,
|
||||||
|
Cow::Borrowed(_) => legacy_entry.1,
|
||||||
|
};
|
||||||
|
Ok(Self {
|
||||||
|
name,
|
||||||
|
size: 0,
|
||||||
|
sha1: vec![],
|
||||||
|
legacy_zip_filename: Some(idx),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn file_path(&self, media_folder: &Path) -> PathBuf {
|
||||||
|
media_folder.join(&self.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn is_equal_to(
|
||||||
|
&self,
|
||||||
|
meta: &Meta,
|
||||||
|
self_zipped: &ZipFile,
|
||||||
|
other_path: &Path,
|
||||||
|
) -> bool {
|
||||||
|
// TODO: check hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147)
|
||||||
|
let self_size = if meta.media_list_is_hashmap() {
|
||||||
|
self_zipped.size()
|
||||||
|
} else {
|
||||||
|
self.size as u64
|
||||||
|
};
|
||||||
|
fs::metadata(other_path)
|
||||||
|
.map(|metadata| metadata.len() as u64 == self_size)
|
||||||
|
.unwrap_or_default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn extract_media_entries(
|
||||||
|
meta: &Meta,
|
||||||
|
archive: &mut ZipArchive<File>,
|
||||||
|
) -> Result<Vec<MediaEntry>> {
|
||||||
|
let media_list_data = get_media_list_data(archive, meta)?;
|
||||||
|
if meta.media_list_is_hashmap() {
|
||||||
|
let map: HashMap<&str, String> = serde_json::from_slice(&media_list_data)?;
|
||||||
|
map.into_iter().map(MediaEntry::from_legacy).collect()
|
||||||
|
} else {
|
||||||
|
MediaEntries::decode_checked(&media_list_data).map(|m| m.entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn safe_normalized_file_name(name: &str) -> Result<Cow<str>> {
|
||||||
|
if !filename_is_safe(name) {
|
||||||
|
Err(AnkiError::ImportError(ImportError::Corrupt))
|
||||||
|
} else {
|
||||||
|
Ok(normalize_filename(name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_media_list_data(archive: &mut ZipArchive<File>, meta: &Meta) -> Result<Vec<u8>> {
|
||||||
|
let mut file = archive.by_name("media")?;
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
if meta.zstd_compressed() {
|
||||||
|
copy_decode(file, &mut buf)?;
|
||||||
|
} else {
|
||||||
|
io::copy(&mut file, &mut buf)?;
|
||||||
|
}
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MediaEntries {
|
||||||
|
fn decode_checked(buf: &[u8]) -> Result<Self> {
|
||||||
|
let entries: Self = Message::decode(buf)?;
|
||||||
|
for entry in &entries.entries {
|
||||||
|
if matches!(safe_normalized_file_name(&entry.name)?, Cow::Owned(_)) {
|
||||||
|
return Err(AnkiError::ImportError(ImportError::Corrupt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn normalization() {
|
||||||
|
// legacy entries get normalized on deserialisation
|
||||||
|
let entry = MediaEntry::from_legacy(("1", "con".to_owned())).unwrap();
|
||||||
|
assert_eq!(entry.name, "con_");
|
||||||
|
|
||||||
|
// new-style entries should have been normalized on export
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
MediaEntries {
|
||||||
|
entries: vec![MediaEntry::new("con", 0, Vec::new())],
|
||||||
|
}
|
||||||
|
.encode(&mut entries)
|
||||||
|
.unwrap();
|
||||||
|
assert!(MediaEntries::decode_checked(&entries).is_err());
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,7 +1,13 @@
|
||||||
// Copyright: Ankitects Pty Ltd and contributors
|
// Copyright: Ankitects Pty Ltd and contributors
|
||||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
|
use std::{fs::File, io::Read};
|
||||||
|
|
||||||
|
use prost::Message;
|
||||||
|
use zip::ZipArchive;
|
||||||
|
|
||||||
pub(super) use crate::backend_proto::{package_metadata::Version, PackageMetadata as Meta};
|
pub(super) use crate::backend_proto::{package_metadata::Version, PackageMetadata as Meta};
|
||||||
|
use crate::{error::ImportError, prelude::*};
|
||||||
|
|
||||||
impl Version {
|
impl Version {
|
||||||
pub(super) fn collection_filename(&self) -> &'static str {
|
pub(super) fn collection_filename(&self) -> &'static str {
|
||||||
|
@ -27,6 +33,31 @@ impl Meta {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Extracts meta data from an archive and checks if its version is supported.
|
||||||
|
pub(super) fn from_archive(archive: &mut ZipArchive<File>) -> Result<Self> {
|
||||||
|
let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| {
|
||||||
|
let mut buf = vec![];
|
||||||
|
meta_file.read_to_end(&mut buf).ok()?;
|
||||||
|
Some(buf)
|
||||||
|
});
|
||||||
|
let meta = if let Some(bytes) = meta_bytes {
|
||||||
|
let meta: Meta = Message::decode(&*bytes)?;
|
||||||
|
if meta.version() == Version::Unknown {
|
||||||
|
return Err(AnkiError::ImportError(ImportError::TooNew));
|
||||||
|
}
|
||||||
|
meta
|
||||||
|
} else {
|
||||||
|
Meta {
|
||||||
|
version: if archive.by_name("collection.anki21").is_ok() {
|
||||||
|
Version::Legacy2
|
||||||
|
} else {
|
||||||
|
Version::Legacy1
|
||||||
|
} as i32,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(meta)
|
||||||
|
}
|
||||||
|
|
||||||
pub(super) fn collection_filename(&self) -> &'static str {
|
pub(super) fn collection_filename(&self) -> &'static str {
|
||||||
self.version().collection_filename()
|
self.version().collection_filename()
|
||||||
}
|
}
|
||||||
|
@ -39,10 +70,6 @@ impl Meta {
|
||||||
self.is_legacy()
|
self.is_legacy()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn strict_media_checks(&self) -> bool {
|
|
||||||
!self.is_legacy()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_legacy(&self) -> bool {
|
fn is_legacy(&self) -> bool {
|
||||||
matches!(self.version(), Version::Legacy1 | Version::Legacy2)
|
matches!(self.version(), Version::Legacy1 | Version::Legacy2)
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
mod apkg;
|
mod apkg;
|
||||||
mod colpkg;
|
mod colpkg;
|
||||||
|
mod media;
|
||||||
mod meta;
|
mod meta;
|
||||||
|
|
||||||
pub(crate) use apkg::NoteMeta;
|
pub(crate) use apkg::NoteMeta;
|
||||||
|
|
Loading…
Reference in a new issue