Refactor import-export/package

- Move media and meta code into appropriate modules.
- Normalize/check for normalization when deserializing media entries.
This commit is contained in:
RumovZ 2022-04-06 18:02:02 +02:00
parent 80dc3ae99e
commit efde7c7acc
6 changed files with 172 additions and 128 deletions

View file

@ -18,7 +18,7 @@ use crate::{
collection::CollectionBuilder, collection::CollectionBuilder,
import_export::{ import_export::{
gather::ExchangeData, gather::ExchangeData,
package::{colpkg::import::extract_media_entries, Meta}, package::{media::extract_media_entries, Meta},
}, },
io::{atomic_rename, tempfile_in_parent_of}, io::{atomic_rename, tempfile_in_parent_of},
prelude::*, prelude::*,

View file

@ -306,17 +306,6 @@ fn write_media_files(
Ok(()) Ok(())
} }
impl MediaEntry {
fn new(name: impl Into<String>, size: impl TryInto<u32>, sha1: impl Into<Vec<u8>>) -> Self {
MediaEntry {
name: name.into(),
size: size.try_into().unwrap_or_default(),
sha1: sha1.into(),
legacy_zip_filename: None,
}
}
}
fn normalized_unicode_file_name(filename: &OsStr) -> Result<String> { fn normalized_unicode_file_name(filename: &OsStr) -> Result<String> {
let filename = filename.to_str().ok_or_else(|| { let filename = filename.to_str().ok_or_else(|| {
AnkiError::IoError(format!( AnkiError::IoError(format!(

View file

@ -2,57 +2,25 @@
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{ use std::{
borrow::Cow, fs::File,
collections::HashMap, io::{self, Write},
fs::{self, File},
io::{self, Read, Write},
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use prost::Message;
use zip::{read::ZipFile, ZipArchive}; use zip::{read::ZipFile, ZipArchive};
use zstd::{self, stream::copy_decode}; use zstd::{self, stream::copy_decode};
use super::super::Version;
use crate::{ use crate::{
collection::CollectionBuilder, collection::CollectionBuilder,
error::ImportError, error::ImportError,
import_export::{ import_export::{
package::{MediaEntries, MediaEntry, Meta}, package::{media::extract_media_entries, MediaEntry, Meta},
ImportProgress, ImportProgress,
}, },
io::{atomic_rename, filename_is_safe, tempfile_in_parent_of}, io::{atomic_rename, tempfile_in_parent_of},
media::files::normalize_filename,
prelude::*, prelude::*,
}; };
impl Meta {
/// Extracts meta data from an archive and checks if its version is supported.
pub(super) fn from_archive(archive: &mut ZipArchive<File>) -> Result<Self> {
let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| {
let mut buf = vec![];
meta_file.read_to_end(&mut buf).ok()?;
Some(buf)
});
let meta = if let Some(bytes) = meta_bytes {
let meta: Meta = Message::decode(&*bytes)?;
if meta.version() == Version::Unknown {
return Err(AnkiError::ImportError(ImportError::TooNew));
}
meta
} else {
Meta {
version: if archive.by_name("collection.anki21").is_ok() {
Version::Legacy2
} else {
Version::Legacy1
} as i32,
}
};
Ok(meta)
}
}
pub fn import_colpkg( pub fn import_colpkg(
colpkg_path: &str, colpkg_path: &str,
target_col_path: &str, target_col_path: &str,
@ -131,7 +99,7 @@ fn maybe_restore_media_file(
entry: &MediaEntry, entry: &MediaEntry,
zip_file: &mut ZipFile, zip_file: &mut ZipFile,
) -> Result<()> { ) -> Result<()> {
let file_path = entry.safe_normalized_file_path(meta, media_folder)?; let file_path = entry.file_path(media_folder);
let already_exists = entry.is_equal_to(meta, zip_file, &file_path); let already_exists = entry.is_equal_to(meta, zip_file, &file_path);
if !already_exists { if !already_exists {
restore_media_file(meta, zip_file, &file_path)?; restore_media_file(meta, zip_file, &file_path)?;
@ -153,70 +121,6 @@ fn restore_media_file(meta: &Meta, zip_file: &mut ZipFile, path: &Path) -> Resul
atomic_rename(tempfile, path, false) atomic_rename(tempfile, path, false)
} }
impl MediaEntry {
fn safe_normalized_file_path(&self, meta: &Meta, media_folder: &Path) -> Result<PathBuf> {
if !filename_is_safe(&self.name) {
return Err(AnkiError::ImportError(ImportError::Corrupt));
}
let normalized = maybe_normalizing(&self.name, meta.strict_media_checks())?;
Ok(media_folder.join(normalized.as_ref()))
}
fn is_equal_to(&self, meta: &Meta, self_zipped: &ZipFile, other_path: &Path) -> bool {
// TODO: checks hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147)
let self_size = if meta.media_list_is_hashmap() {
self_zipped.size()
} else {
self.size as u64
};
fs::metadata(other_path)
.map(|metadata| metadata.len() as u64 == self_size)
.unwrap_or_default()
}
}
/// - If strict is true, return an error if not normalized.
/// - If false, return the normalized version.
fn maybe_normalizing(name: &str, strict: bool) -> Result<Cow<str>> {
let normalized = normalize_filename(name);
if strict && matches!(normalized, Cow::Owned(_)) {
// exporting code should have checked this
Err(AnkiError::ImportError(ImportError::Corrupt))
} else {
Ok(normalized)
}
}
pub(crate) fn extract_media_entries(
meta: &Meta,
archive: &mut ZipArchive<File>,
) -> Result<Vec<MediaEntry>> {
let mut file = archive.by_name("media")?;
let mut buf = Vec::new();
if meta.zstd_compressed() {
copy_decode(file, &mut buf)?;
} else {
io::copy(&mut file, &mut buf)?;
}
if meta.media_list_is_hashmap() {
let map: HashMap<&str, String> = serde_json::from_slice(&buf)?;
map.into_iter()
.map(|(idx_str, name)| {
let idx: u32 = idx_str.parse()?;
Ok(MediaEntry {
name,
size: 0,
sha1: vec![],
legacy_zip_filename: Some(idx),
})
})
.collect()
} else {
let entries: MediaEntries = Message::decode(&*buf)?;
Ok(entries.entries)
}
}
fn copy_collection( fn copy_collection(
archive: &mut ZipArchive<File>, archive: &mut ZipArchive<File>,
writer: &mut impl Write, writer: &mut impl Write,
@ -233,14 +137,3 @@ fn copy_collection(
Ok(()) Ok(())
} }
#[cfg(test)]
mod test {
use super::*;
#[test]
fn normalization() {
assert_eq!(&maybe_normalizing("con", false).unwrap(), "con_");
assert!(&maybe_normalizing("con", true).is_err());
}
}

View file

@ -0,0 +1,134 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{
borrow::Cow,
collections::HashMap,
fs::{self, File},
io,
path::{Path, PathBuf},
};
use prost::Message;
use zip::{read::ZipFile, ZipArchive};
use zstd::stream::copy_decode;
use super::{MediaEntries, MediaEntry, Meta};
use crate::{
error::ImportError, io::filename_is_safe, media::files::normalize_filename, prelude::*,
};
impl MediaEntry {
pub(super) fn new(
name: impl Into<String>,
size: impl TryInto<u32>,
sha1: impl Into<Vec<u8>>,
) -> Self {
MediaEntry {
name: name.into(),
size: size.try_into().unwrap_or_default(),
sha1: sha1.into(),
legacy_zip_filename: None,
}
}
pub(super) fn from_legacy(legacy_entry: (&str, String)) -> Result<Self> {
let idx: u32 = legacy_entry.0.parse()?;
let name = match safe_normalized_file_name(&legacy_entry.1)? {
Cow::Owned(new_name) => new_name,
Cow::Borrowed(_) => legacy_entry.1,
};
Ok(Self {
name,
size: 0,
sha1: vec![],
legacy_zip_filename: Some(idx),
})
}
pub(super) fn file_path(&self, media_folder: &Path) -> PathBuf {
media_folder.join(&self.name)
}
pub(super) fn is_equal_to(
&self,
meta: &Meta,
self_zipped: &ZipFile,
other_path: &Path,
) -> bool {
// TODO: check hashs (https://github.com/ankitects/anki/pull/1723#discussion_r829653147)
let self_size = if meta.media_list_is_hashmap() {
self_zipped.size()
} else {
self.size as u64
};
fs::metadata(other_path)
.map(|metadata| metadata.len() as u64 == self_size)
.unwrap_or_default()
}
}
pub(super) fn extract_media_entries(
meta: &Meta,
archive: &mut ZipArchive<File>,
) -> Result<Vec<MediaEntry>> {
let media_list_data = get_media_list_data(archive, meta)?;
if meta.media_list_is_hashmap() {
let map: HashMap<&str, String> = serde_json::from_slice(&media_list_data)?;
map.into_iter().map(MediaEntry::from_legacy).collect()
} else {
MediaEntries::decode_checked(&media_list_data).map(|m| m.entries)
}
}
fn safe_normalized_file_name(name: &str) -> Result<Cow<str>> {
if !filename_is_safe(name) {
Err(AnkiError::ImportError(ImportError::Corrupt))
} else {
Ok(normalize_filename(name))
}
}
fn get_media_list_data(archive: &mut ZipArchive<File>, meta: &Meta) -> Result<Vec<u8>> {
let mut file = archive.by_name("media")?;
let mut buf = Vec::new();
if meta.zstd_compressed() {
copy_decode(file, &mut buf)?;
} else {
io::copy(&mut file, &mut buf)?;
}
Ok(buf)
}
impl MediaEntries {
fn decode_checked(buf: &[u8]) -> Result<Self> {
let entries: Self = Message::decode(buf)?;
for entry in &entries.entries {
if matches!(safe_normalized_file_name(&entry.name)?, Cow::Owned(_)) {
return Err(AnkiError::ImportError(ImportError::Corrupt));
}
}
Ok(entries)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn normalization() {
// legacy entries get normalized on deserialisation
let entry = MediaEntry::from_legacy(("1", "con".to_owned())).unwrap();
assert_eq!(entry.name, "con_");
// new-style entries should have been normalized on export
let mut entries = Vec::new();
MediaEntries {
entries: vec![MediaEntry::new("con", 0, Vec::new())],
}
.encode(&mut entries)
.unwrap();
assert!(MediaEntries::decode_checked(&entries).is_err());
}
}

View file

@ -1,7 +1,13 @@
// Copyright: Ankitects Pty Ltd and contributors // Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{fs::File, io::Read};
use prost::Message;
use zip::ZipArchive;
pub(super) use crate::backend_proto::{package_metadata::Version, PackageMetadata as Meta}; pub(super) use crate::backend_proto::{package_metadata::Version, PackageMetadata as Meta};
use crate::{error::ImportError, prelude::*};
impl Version { impl Version {
pub(super) fn collection_filename(&self) -> &'static str { pub(super) fn collection_filename(&self) -> &'static str {
@ -27,6 +33,31 @@ impl Meta {
} }
} }
/// Extracts meta data from an archive and checks if its version is supported.
pub(super) fn from_archive(archive: &mut ZipArchive<File>) -> Result<Self> {
let meta_bytes = archive.by_name("meta").ok().and_then(|mut meta_file| {
let mut buf = vec![];
meta_file.read_to_end(&mut buf).ok()?;
Some(buf)
});
let meta = if let Some(bytes) = meta_bytes {
let meta: Meta = Message::decode(&*bytes)?;
if meta.version() == Version::Unknown {
return Err(AnkiError::ImportError(ImportError::TooNew));
}
meta
} else {
Meta {
version: if archive.by_name("collection.anki21").is_ok() {
Version::Legacy2
} else {
Version::Legacy1
} as i32,
}
};
Ok(meta)
}
pub(super) fn collection_filename(&self) -> &'static str { pub(super) fn collection_filename(&self) -> &'static str {
self.version().collection_filename() self.version().collection_filename()
} }
@ -39,10 +70,6 @@ impl Meta {
self.is_legacy() self.is_legacy()
} }
pub(super) fn strict_media_checks(&self) -> bool {
!self.is_legacy()
}
fn is_legacy(&self) -> bool { fn is_legacy(&self) -> bool {
matches!(self.version(), Version::Legacy1 | Version::Legacy2) matches!(self.version(), Version::Legacy1 | Version::Legacy2)
} }

View file

@ -3,6 +3,7 @@
mod apkg; mod apkg;
mod colpkg; mod colpkg;
mod media;
mod meta; mod meta;
pub(crate) use apkg::NoteMeta; pub(crate) use apkg::NoteMeta;