Start on apkg importing on backend

This commit is contained in:
RumovZ 2022-04-05 18:15:51 +02:00
parent fce797cb47
commit 6836da07ec
7 changed files with 389 additions and 2 deletions

View file

@ -0,0 +1,322 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{
collections::{HashMap, HashSet},
fs::{self, File},
io::{self, Write},
mem,
path::{Path, PathBuf},
sync::Arc,
};
use sha1::Sha1;
use tempfile::NamedTempFile;
use zip::ZipArchive;
use crate::{
collection::CollectionBuilder,
import_export::{
gather::ExchangeData,
package::{colpkg::import::extract_media_entries, Meta},
},
io::{atomic_rename, tempfile_in_parent_of},
prelude::*,
text::replace_media_refs,
};
#[derive(Debug)]
struct Context {
archive: ZipArchive<File>,
guid_map: HashMap<String, NoteMeta>,
remapped_notetypes: HashMap<NotetypeId, NotetypeId>,
existing_notes: HashSet<NoteId>,
existing_notetypes: HashSet<NotetypeId>,
data: ExchangeData,
usn: Usn,
media_map: HashMap<String, String>,
target_media_folder: PathBuf,
conflicting_notes: HashSet<String>,
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct NoteMeta {
id: NoteId,
mtime: TimestampSecs,
notetype_id: NotetypeId,
}
impl NoteMeta {
pub(crate) fn new(id: NoteId, mtime: TimestampSecs, notetype_id: NotetypeId) -> Self {
Self {
id,
mtime,
notetype_id,
}
}
fn from_note(note: &Note) -> Self {
Self::new(note.id, note.mtime, note.notetype_id)
}
}
impl Collection {
pub fn import_apkg(
&mut self,
path: &str,
search: impl TryIntoSearch,
with_scheduling: bool,
) -> Result<()> {
let file = File::open(path)?;
let archive = ZipArchive::new(file)?;
let mut ctx = Context::new(archive, self, search, with_scheduling)?;
ctx.prepare_notetypes(self)?;
ctx.prepare_notes()?;
self.insert_data(&ctx.data)
}
}
fn build_media_map(archive: &mut ZipArchive<File>) -> Result<HashMap<String, String>> {
Ok(extract_media_entries(&Meta::new_legacy(), archive)?
.into_iter()
.map(|entry| (entry.name, entry.legacy_zip_filename.unwrap().to_string()))
.collect())
}
impl ExchangeData {
fn gather_from_archive(
archive: &mut ZipArchive<File>,
search: impl TryIntoSearch,
with_scheduling: bool,
) -> Result<Self> {
let mut zip_file = archive.by_name(Meta::new_legacy().collection_filename())?;
let mut tempfile = NamedTempFile::new()?;
io::copy(&mut zip_file, &mut tempfile)?;
let mut col = CollectionBuilder::new(tempfile.path()).build()?;
let mut data = ExchangeData::default();
data.gather_data(&mut col, search, with_scheduling)?;
Ok(data)
}
}
impl Context {
fn new(
mut archive: ZipArchive<File>,
target_col: &mut Collection,
search: impl TryIntoSearch,
with_scheduling: bool,
) -> Result<Self> {
let data = ExchangeData::gather_from_archive(&mut archive, search, with_scheduling)?;
let media_map = build_media_map(&mut archive)?;
Ok(Self {
archive,
data,
guid_map: target_col.storage.note_guid_map()?,
existing_notes: target_col.storage.get_all_note_ids()?,
existing_notetypes: target_col.storage.get_all_notetype_ids()?,
media_map,
target_media_folder: target_col.media_folder.clone(),
usn: target_col.usn()?,
conflicting_notes: HashSet::new(),
remapped_notetypes: HashMap::new(),
})
}
fn prepare_notetypes(&mut self, target_col: &mut Collection) -> Result<()> {
for notetype in mem::take(&mut self.data.notetypes) {
if let Some(existing) = target_col.get_notetype(notetype.id)? {
self.merge_or_remap_notetype(notetype, existing)?;
} else {
self.add_notetype(notetype);
}
}
Ok(())
}
fn add_notetype(&mut self, mut notetype: Notetype) {
self.existing_notetypes.insert(notetype.id);
notetype.usn = self.usn;
self.data.notetypes.push(notetype);
}
fn merge_or_remap_notetype(
&mut self,
incoming: Notetype,
existing: Arc<Notetype>,
) -> Result<()> {
if incoming.schema_hash() == existing.schema_hash() {
self.add_notetype_if_newer(incoming, existing);
} else {
self.add_notetype_with_new_id(incoming)?;
}
Ok(())
}
fn add_notetype_with_new_id(&mut self, mut notetype: Notetype) -> Result<()> {
let new_id = self.next_available_notetype_id();
self.remapped_notetypes.insert(notetype.id, new_id);
notetype.id = new_id;
self.add_notetype(notetype);
Ok(())
}
fn next_available_notetype_id(&self) -> NotetypeId {
let mut next_id = NotetypeId(TimestampMillis::now().0);
while self.existing_notetypes.contains(&next_id) {
next_id.0 += 1;
}
next_id
}
fn add_notetype_if_newer(&mut self, incoming: Notetype, existing: Arc<Notetype>) {
if incoming.mtime_secs > existing.mtime_secs {
self.add_notetype(incoming);
}
}
fn prepare_notes(&mut self) -> Result<()> {
for mut note in mem::take(&mut self.data.notes) {
if let Some(notetype_id) = self.remapped_notetypes.get(&note.notetype_id) {
if self.guid_map.contains_key(&note.guid) {
todo!("ignore");
} else {
note.notetype_id = *notetype_id;
self.prepare_new_note(note)?;
}
} else if let Some(&meta) = self.guid_map.get(&note.guid) {
self.prepare_existing_note(note, meta)?;
} else {
self.prepare_new_note(note)?;
}
}
Ok(())
}
fn add_prepared_note(&mut self, mut note: Note) -> Result<()> {
self.munge_media(&mut note)?;
note.usn = self.usn;
self.data.notes.push(note);
Ok(())
}
fn prepare_new_note(&mut self, mut note: Note) -> Result<()> {
self.to_next_available_note_id(&mut note.id);
self.existing_notes.insert(note.id);
self.guid_map
.insert(note.guid.clone(), NoteMeta::from_note(&note));
self.add_prepared_note(note)
// TODO: Log add
}
fn prepare_existing_note(&mut self, mut note: Note, meta: NoteMeta) -> Result<()> {
if meta.mtime < note.mtime {
if meta.notetype_id == note.notetype_id {
note.id = meta.id;
self.add_prepared_note(note)?;
// TODO: Log update
} else {
self.conflicting_notes.insert(note.guid);
// TODO: Log ignore
}
} else {
// TODO: Log duplicate
}
Ok(())
}
fn munge_media(&mut self, note: &mut Note) -> Result<()> {
let notetype_id = note.notetype_id;
for field in note.fields_mut() {
if let Some(new_field) = self.replace_media_refs_fallible(field, notetype_id)? {
*field = new_field;
};
}
Ok(())
}
fn replace_media_refs_fallible(
&mut self,
field: &mut String,
notetype_id: NotetypeId,
) -> Result<Option<String>> {
let mut res = Ok(());
let out = replace_media_refs(field, |name| {
if res.is_err() {
None
} else {
self.merge_media_maybe_renaming(name, notetype_id)
.unwrap_or_else(|err| {
res = Err(err);
None
})
}
});
res.map(|_| out)
}
fn merge_media_maybe_renaming(
&mut self,
name: &str,
notetype: NotetypeId,
) -> Result<Option<String>> {
Ok(if let Some(zip_name) = self.media_map.get(name) {
let alternate_name = alternate_media_name(name, notetype);
let alternate_path = self.target_media_folder.join(&alternate_name);
if alternate_path.exists() {
Some(alternate_name)
} else {
let mut data = Vec::new();
io::copy(&mut self.archive.by_name(zip_name)?, &mut data)?;
let target_path = self.target_media_folder.join(name);
if !target_path.exists() {
write_data_atomically(&data, &target_path)?;
None
} else if data == fs::read(target_path)? {
None
} else {
write_data_atomically(&data, &alternate_path)?;
Some(alternate_name)
}
}
} else {
None
})
}
fn to_next_available_note_id(&self, note_id: &mut NoteId) {
while self.existing_notes.contains(note_id) {
note_id.0 += 999;
}
}
}
fn write_data_atomically(data: &[u8], path: &Path) -> Result<()> {
let mut tempfile = tempfile_in_parent_of(path)?;
tempfile.write_all(data)?;
atomic_rename(tempfile, path, false)
}
fn alternate_media_name(name: &str, notetype_id: NotetypeId) -> String {
let (stem, dot, extension) = name
.rsplit_once('.')
.map(|(stem, ext)| (stem, ".", ext))
.unwrap_or((name, "", ""));
format!("{stem}_{notetype_id}{dot}{extension}")
}
impl Notetype {
fn schema_hash(&self) -> [u8; 20] {
let mut hasher = Sha1::new();
for field in &self.fields {
hasher.update(field.name.as_bytes());
}
for template in &self.templates {
hasher.update(template.name.as_bytes());
}
hasher.digest().bytes()
}
}

View file

@ -2,3 +2,6 @@
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
mod export; mod export;
mod import;
pub(crate) use import::NoteMeta;

View file

@ -187,7 +187,10 @@ fn maybe_normalizing(name: &str, strict: bool) -> Result<Cow<str>> {
} }
} }
fn extract_media_entries(meta: &Meta, archive: &mut ZipArchive<File>) -> Result<Vec<MediaEntry>> { pub(crate) fn extract_media_entries(
meta: &Meta,
archive: &mut ZipArchive<File>,
) -> Result<Vec<MediaEntry>> {
let mut file = archive.by_name("media")?; let mut file = archive.by_name("media")?;
let mut buf = Vec::new(); let mut buf = Vec::new();
if meta.zstd_compressed() { if meta.zstd_compressed() {

View file

@ -5,6 +5,7 @@ mod apkg;
mod colpkg; mod colpkg;
mod meta; mod meta;
pub(crate) use apkg::NoteMeta;
pub(crate) use colpkg::export::export_colpkg_from_data; pub(crate) use colpkg::export::export_colpkg_from_data;
pub use colpkg::import::import_colpkg; pub use colpkg::import::import_colpkg;
pub(self) use meta::{Meta, Version}; pub(self) use meta::{Meta, Version};

View file

@ -1,12 +1,13 @@
// Copyright: Ankitects Pty Ltd and contributors // Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::collections::HashSet; use std::collections::{HashMap, HashSet};
use rusqlite::{params, Row}; use rusqlite::{params, Row};
use crate::{ use crate::{
error::Result, error::Result,
import_export::package::NoteMeta,
notes::{Note, NoteId, NoteTags}, notes::{Note, NoteId, NoteTags},
notetype::NotetypeId, notetype::NotetypeId,
tags::{join_tags, split_tags}, tags::{join_tags, split_tags},
@ -41,6 +42,13 @@ impl super::SqliteStorage {
.transpose() .transpose()
} }
pub fn get_all_note_ids(&self) -> Result<HashSet<NoteId>> {
self.db
.prepare("SELECT id FROM notes")?
.query_and_then([], |row| Ok(row.get(0)?))?
.collect()
}
/// If fields have been modified, caller must call note.prepare_for_update() prior to calling this. /// If fields have been modified, caller must call note.prepare_for_update() prior to calling this.
pub(crate) fn update_note(&self, note: &Note) -> Result<()> { pub(crate) fn update_note(&self, note: &Note) -> Result<()> {
assert!(note.id.0 != 0); assert!(note.id.0 != 0);
@ -269,6 +277,13 @@ impl super::SqliteStorage {
Ok(()) Ok(())
} }
pub(crate) fn note_guid_map(&mut self) -> Result<HashMap<String, NoteMeta>> {
self.db
.prepare("SELECT guid, id, mod, mid FROM notes")?
.query_and_then([], row_to_note_meta)?
.collect()
}
} }
fn row_to_note(row: &Row) -> Result<Note> { fn row_to_note(row: &Row) -> Result<Note> {
@ -295,3 +310,10 @@ fn row_to_note_tags(row: &Row) -> Result<NoteTags> {
tags: row.get(3)?, tags: row.get(3)?,
}) })
} }
fn row_to_note_meta(row: &Row) -> Result<(String, NoteMeta)> {
Ok((
row.get(0)?,
NoteMeta::new(row.get(1)?, row.get(2)?, row.get(3)?),
))
}

View file

@ -123,6 +123,13 @@ impl SqliteStorage {
.collect() .collect()
} }
pub fn get_all_notetype_ids(&self) -> Result<HashSet<NotetypeId>> {
self.db
.prepare("SELECT id FROM notetypes")?
.query_and_then([], |row| Ok(row.get(0)?))?
.collect()
}
/// Returns list of (id, name, use_count) /// Returns list of (id, name, use_count)
pub fn get_notetype_use_counts(&self) -> Result<Vec<(NotetypeId, String, u32)>> { pub fn get_notetype_use_counts(&self) -> Result<Vec<(NotetypeId, String, u32)>> {
self.db self.db

View file

@ -246,6 +246,35 @@ pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
out out
} }
/// Calls `replacer` for every media reference in `text`, and optionally
/// replaces it with something else. [None] if no reference was found.
pub(crate) fn replace_media_refs(
text: &str,
mut replacer: impl FnMut(&str) -> Option<String>,
) -> Option<String> {
let mut rep = |caps: &Captures| {
let whole_match = caps.get(0).unwrap().as_str();
let old_name = caps.iter().skip(1).find_map(|g| g).unwrap().as_str();
if let Some(new_name) = replacer(old_name) {
whole_match.replace(old_name, &new_name)
} else {
whole_match.to_owned()
}
};
let mut out = Cow::from(text);
if let Cow::Owned(s) = HTML_MEDIA_TAGS.replace_all(&out, &mut rep) {
out = s.into();
}
if let Cow::Owned(s) = AV_TAGS.replace_all(&out, &mut rep) {
out = s.into();
}
match out {
Cow::Owned(s) => Some(s),
Cow::Borrowed(_) => None,
}
}
pub(crate) fn extract_underscored_css_imports(text: &str) -> Vec<&str> { pub(crate) fn extract_underscored_css_imports(text: &str) -> Vec<&str> {
UNDERSCORED_CSS_IMPORTS UNDERSCORED_CSS_IMPORTS
.captures_iter(text) .captures_iter(text)