mirror of
https://github.com/ankitects/anki.git
synced 2025-09-21 07:22:23 -04:00
Refactor import-export/package
- Move media and meta code into appropriate modules. - Normalize/check for normalization when deserializing media entries.
This commit is contained in:
parent
80dc3ae99e
commit
efde7c7acc
6 changed files with 172 additions and 128 deletions
|
@ -18,7 +18,7 @@ use crate::{
|
|||
collection::CollectionBuilder,
|
||||
import_export::{
|
||||
gather::ExchangeData,
|
||||
package::{colpkg::import::extract_media_entries, Meta},
|
||||
package::{media::extract_media_entries, Meta},
|
||||
},
|
||||
io::{atomic_rename, tempfile_in_parent_of},
|
||||
prelude::*,
|
||||
|
|
|
@ -306,17 +306,6 @@ fn write_media_files(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
impl MediaEntry {
|
||||
fn new(name: impl Into<String>, size: impl TryInto<u32>, sha1: impl Into<Vec<u8>>) -> Self {
|
||||
MediaEntry {
|
||||
name: name.into(),
|
||||
size: size.try_into().unwrap_or_default(),
|
||||
sha1: sha1.into(),
|
||||
legacy_zip_filename: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn normalized_unicode_file_name(filename: &OsStr) -> Result<String> {
|
||||
let filename = filename.to_str().ok_or_else(|| {
|
||||
AnkiError::IoError(format!(
|
||||
|
|
|
@ -2,57 +2,25 @@
|
|||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::HashMap,
|
||||
fs::{self, File},
|
||||
io::{self, Read, Write},
|
||||
fs::File,
|
||||
io::{self, Write},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use prost::Message;
|
||||
use zip::{read::ZipFile, ZipArchive};
|
||||
use zstd::{self, stream::copy_decode};
|
||||
|
||||
use super::super::Version;
|
||||
use crate::{
|
||||
collection::CollectionBuilder,
|
||||
error::ImportError,
|
||||
import_export::{
|
||||
package::{MediaEntries, MediaEntry, Meta},
|
||||
package::{media::extract_media_entries, MediaEntry, Meta},
|
||||
ImportProgress,
|
||||
},
|
||||
io::{atomic_rename, filename_is_safe, tempfile_in_parent_of},
|
||||
media::files::normalize_filename,
|
||||
io::{atomic_rename, tempfile_in_parent_of},
|
||||
prelude::*,
|
||||
};
|
||||
|
||||
impl Meta {
|
||||
/// Extracts meta data from an archive and checks if its version is supported.
|
||||
pub(super) fn from_archive(archive: &mut ZipArchive<File>) -> Result<Self> {
|
||||
let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| {
|
||||
let mut buf = vec![];
|
||||
meta_file.read_to_end(&mut buf).ok()?;
|
||||
Some(buf)
|
||||
});
|
||||
let meta = if let Some(bytes) = meta_bytes {
|
||||
let meta: Meta = Message::decode(&*bytes)?;
|
||||
if meta.version() == Version::Unknown {
|
||||
return Err(AnkiError::ImportError(ImportError::TooNew));
|
||||
}
|
||||
meta
|
||||
} else {
|
||||
Meta {
|
||||
version: if archive.by_name("collection.anki21").is_ok() {
|
||||
Version::Legacy2
|
||||
} else {
|
||||
Version::Legacy1
|
||||
} as i32,
|
||||
}
|
||||
};
|
||||
Ok(meta)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn import_colpkg(
|
||||
colpkg_path: &str,
|
||||
target_col_path: &str,
|
||||
|
@ -131,7 +99,7 @@ fn maybe_restore_media_file(
|
|||
entry: &MediaEntry,
|
||||
zip_file: &mut ZipFile,
|
||||
) -> Result<()> {
|
||||
let file_path = entry.safe_normalized_file_path(meta, media_folder)?;
|
||||
let file_path = entry.file_path(media_folder);
|
||||
let already_exists = entry.is_equal_to(meta, zip_file, &file_path);
|
||||
if !already_exists {
|
||||
restore_media_file(meta, zip_file, &file_path)?;
|
||||
|
@ -153,70 +121,6 @@ fn restore_media_file(meta: &Meta, zip_file: &mut ZipFile, path: &Path) -> Resul
|
|||
atomic_rename(tempfile, path, false)
|
||||
}
|
||||
|
||||
impl MediaEntry {
|
||||
fn safe_normalized_file_path(&self, meta: &Meta, media_folder: &Path) -> Result<PathBuf> {
|
||||
if !filename_is_safe(&self.name) {
|
||||
return Err(AnkiError::ImportError(ImportError::Corrupt));
|
||||
}
|
||||
let normalized = maybe_normalizing(&self.name, meta.strict_media_checks())?;
|
||||
Ok(media_folder.join(normalized.as_ref()))
|
||||
}
|
||||
|
||||
fn is_equal_to(&self, meta: &Meta, self_zipped: &ZipFile, other_path: &Path) -> bool {
|
||||
// TODO: checks hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147)
|
||||
let self_size = if meta.media_list_is_hashmap() {
|
||||
self_zipped.size()
|
||||
} else {
|
||||
self.size as u64
|
||||
};
|
||||
fs::metadata(other_path)
|
||||
.map(|metadata| metadata.len() as u64 == self_size)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// - If strict is true, return an error if not normalized.
|
||||
/// - If false, return the normalized version.
|
||||
fn maybe_normalizing(name: &str, strict: bool) -> Result<Cow<str>> {
|
||||
let normalized = normalize_filename(name);
|
||||
if strict && matches!(normalized, Cow::Owned(_)) {
|
||||
// exporting code should have checked this
|
||||
Err(AnkiError::ImportError(ImportError::Corrupt))
|
||||
} else {
|
||||
Ok(normalized)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn extract_media_entries(
|
||||
meta: &Meta,
|
||||
archive: &mut ZipArchive<File>,
|
||||
) -> Result<Vec<MediaEntry>> {
|
||||
let mut file = archive.by_name("media")?;
|
||||
let mut buf = Vec::new();
|
||||
if meta.zstd_compressed() {
|
||||
copy_decode(file, &mut buf)?;
|
||||
} else {
|
||||
io::copy(&mut file, &mut buf)?;
|
||||
}
|
||||
if meta.media_list_is_hashmap() {
|
||||
let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
|
||||
map.into_iter()
|
||||
.map(|(idx_str, name)| {
|
||||
let idx: u32 = idx_str.parse()?;
|
||||
Ok(MediaEntry {
|
||||
name,
|
||||
size: 0,
|
||||
sha1: vec![],
|
||||
legacy_zip_filename: Some(idx),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
let entries: MediaEntries = Message::decode(&*buf)?;
|
||||
Ok(entries.entries)
|
||||
}
|
||||
}
|
||||
|
||||
fn copy_collection(
|
||||
archive: &mut ZipArchive<File>,
|
||||
writer: &mut impl Write,
|
||||
|
@ -233,14 +137,3 @@ fn copy_collection(
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn normalization() {
|
||||
assert_eq!(&maybe_normalizing("con", false).unwrap(), "con_");
|
||||
assert!(&maybe_normalizing("con", true).is_err());
|
||||
}
|
||||
}
|
||||
|
|
134
rslib/src/import_export/package/media.rs
Normal file
134
rslib/src/import_export/package/media.rs
Normal file
|
@ -0,0 +1,134 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::HashMap,
|
||||
fs::{self, File},
|
||||
io,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use prost::Message;
|
||||
use zip::{read::ZipFile, ZipArchive};
|
||||
use zstd::stream::copy_decode;
|
||||
|
||||
use super::{MediaEntries, MediaEntry, Meta};
|
||||
use crate::{
|
||||
error::ImportError, io::filename_is_safe, media::files::normalize_filename, prelude::*,
|
||||
};
|
||||
|
||||
impl MediaEntry {
|
||||
pub(super) fn new(
|
||||
name: impl Into<String>,
|
||||
size: impl TryInto<u32>,
|
||||
sha1: impl Into<Vec<u8>>,
|
||||
) -> Self {
|
||||
MediaEntry {
|
||||
name: name.into(),
|
||||
size: size.try_into().unwrap_or_default(),
|
||||
sha1: sha1.into(),
|
||||
legacy_zip_filename: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn from_legacy(legacy_entry: (&str, String)) -> Result<Self> {
|
||||
let idx: u32 = legacy_entry.0.parse()?;
|
||||
let name = match safe_normalized_file_name(&legacy_entry.1)? {
|
||||
Cow::Owned(new_name) => new_name,
|
||||
Cow::Borrowed(_) => legacy_entry.1,
|
||||
};
|
||||
Ok(Self {
|
||||
name,
|
||||
size: 0,
|
||||
sha1: vec![],
|
||||
legacy_zip_filename: Some(idx),
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn file_path(&self, media_folder: &Path) -> PathBuf {
|
||||
media_folder.join(&self.name)
|
||||
}
|
||||
|
||||
pub(super) fn is_equal_to(
|
||||
&self,
|
||||
meta: &Meta,
|
||||
self_zipped: &ZipFile,
|
||||
other_path: &Path,
|
||||
) -> bool {
|
||||
// TODO: check hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147)
|
||||
let self_size = if meta.media_list_is_hashmap() {
|
||||
self_zipped.size()
|
||||
} else {
|
||||
self.size as u64
|
||||
};
|
||||
fs::metadata(other_path)
|
||||
.map(|metadata| metadata.len() as u64 == self_size)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn extract_media_entries(
|
||||
meta: &Meta,
|
||||
archive: &mut ZipArchive<File>,
|
||||
) -> Result<Vec<MediaEntry>> {
|
||||
let media_list_data = get_media_list_data(archive, meta)?;
|
||||
if meta.media_list_is_hashmap() {
|
||||
let map: HashMap<&str, String> = serde_json::from_slice(&media_list_data)?;
|
||||
map.into_iter().map(MediaEntry::from_legacy).collect()
|
||||
} else {
|
||||
MediaEntries::decode_checked(&media_list_data).map(|m| m.entries)
|
||||
}
|
||||
}
|
||||
|
||||
fn safe_normalized_file_name(name: &str) -> Result<Cow<str>> {
|
||||
if !filename_is_safe(name) {
|
||||
Err(AnkiError::ImportError(ImportError::Corrupt))
|
||||
} else {
|
||||
Ok(normalize_filename(name))
|
||||
}
|
||||
}
|
||||
|
||||
fn get_media_list_data(archive: &mut ZipArchive<File>, meta: &Meta) -> Result<Vec<u8>> {
|
||||
let mut file = archive.by_name("media")?;
|
||||
let mut buf = Vec::new();
|
||||
if meta.zstd_compressed() {
|
||||
copy_decode(file, &mut buf)?;
|
||||
} else {
|
||||
io::copy(&mut file, &mut buf)?;
|
||||
}
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
impl MediaEntries {
|
||||
fn decode_checked(buf: &[u8]) -> Result<Self> {
|
||||
let entries: Self = Message::decode(buf)?;
|
||||
for entry in &entries.entries {
|
||||
if matches!(safe_normalized_file_name(&entry.name)?, Cow::Owned(_)) {
|
||||
return Err(AnkiError::ImportError(ImportError::Corrupt));
|
||||
}
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn normalization() {
|
||||
// legacy entries get normalized on deserialisation
|
||||
let entry = MediaEntry::from_legacy(("1", "con".to_owned())).unwrap();
|
||||
assert_eq!(entry.name, "con_");
|
||||
|
||||
// new-style entries should have been normalized on export
|
||||
let mut entries = Vec::new();
|
||||
MediaEntries {
|
||||
entries: vec![MediaEntry::new("con", 0, Vec::new())],
|
||||
}
|
||||
.encode(&mut entries)
|
||||
.unwrap();
|
||||
assert!(MediaEntries::decode_checked(&entries).is_err());
|
||||
}
|
||||
}
|
|
@ -1,7 +1,13 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::{fs::File, io::Read};
|
||||
|
||||
use prost::Message;
|
||||
use zip::ZipArchive;
|
||||
|
||||
pub(super) use crate::backend_proto::{package_metadata::Version, PackageMetadata as Meta};
|
||||
use crate::{error::ImportError, prelude::*};
|
||||
|
||||
impl Version {
|
||||
pub(super) fn collection_filename(&self) -> &'static str {
|
||||
|
@ -27,6 +33,31 @@ impl Meta {
|
|||
}
|
||||
}
|
||||
|
||||
/// Extracts meta data from an archive and checks if its version is supported.
|
||||
pub(super) fn from_archive(archive: &mut ZipArchive<File>) -> Result<Self> {
|
||||
let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| {
|
||||
let mut buf = vec![];
|
||||
meta_file.read_to_end(&mut buf).ok()?;
|
||||
Some(buf)
|
||||
});
|
||||
let meta = if let Some(bytes) = meta_bytes {
|
||||
let meta: Meta = Message::decode(&*bytes)?;
|
||||
if meta.version() == Version::Unknown {
|
||||
return Err(AnkiError::ImportError(ImportError::TooNew));
|
||||
}
|
||||
meta
|
||||
} else {
|
||||
Meta {
|
||||
version: if archive.by_name("collection.anki21").is_ok() {
|
||||
Version::Legacy2
|
||||
} else {
|
||||
Version::Legacy1
|
||||
} as i32,
|
||||
}
|
||||
};
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub(super) fn collection_filename(&self) -> &'static str {
|
||||
self.version().collection_filename()
|
||||
}
|
||||
|
@ -39,10 +70,6 @@ impl Meta {
|
|||
self.is_legacy()
|
||||
}
|
||||
|
||||
pub(super) fn strict_media_checks(&self) -> bool {
|
||||
!self.is_legacy()
|
||||
}
|
||||
|
||||
fn is_legacy(&self) -> bool {
|
||||
matches!(self.version(), Version::Legacy1 | Version::Legacy2)
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
mod apkg;
|
||||
mod colpkg;
|
||||
mod media;
|
||||
mod meta;
|
||||
|
||||
pub(crate) use apkg::NoteMeta;
|
||||
|
|
Loading…
Reference in a new issue