Add apkg export on backend

This commit is contained in:
RumovZ 2022-03-27 10:42:26 +02:00
parent 5dab7ed47e
commit 566973146f
16 changed files with 544 additions and 34 deletions

View file

@ -0,0 +1,339 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{
collections::{HashMap, HashSet},
path::{Path, PathBuf},
sync::Arc,
};
use rusqlite::{named_params, params};
use tempfile::NamedTempFile;
use crate::{
collection::CollectionBuilder,
import_export::package::{
colpkg::export::{export_collection, MediaIter},
Meta,
},
io::{atomic_rename, tempfile_in_parent_of},
latex::extract_latex,
notetype::CardTemplate,
prelude::*,
storage::{ids_to_string, SchemaVersion, SqliteStorage},
tags::matcher::TagMatcher,
text::{
extract_media_refs, extract_underscored_css_imports, extract_underscored_references,
is_remote_filename,
},
};
impl Collection {
pub fn export_apkg(
&mut self,
out_path: impl AsRef<Path>,
deck_id: Option<DeckId>,
include_scheduling: bool,
include_media: bool,
progress_fn: impl FnMut(usize),
) -> Result<()> {
let temp_apkg = tempfile_in_parent_of(out_path.as_ref())?;
let mut temp_col = NamedTempFile::new()?;
let temp_col_path = temp_col
.path()
.to_str()
.ok_or_else(|| AnkiError::IoError("tempfile with non-unicode name".into()))?;
let media = self.export_collection_extracting_media(
temp_col_path,
deck_id,
include_scheduling,
include_media,
)?;
let col_size = temp_col.as_file().metadata()?.len() as usize;
export_collection(
Meta::new_legacy(),
temp_apkg.path(),
&mut temp_col,
col_size,
media,
&self.tr,
progress_fn,
)?;
atomic_rename(temp_apkg, out_path.as_ref(), true)
}
fn export_collection_extracting_media(
&mut self,
path: &str,
deck_id: Option<DeckId>,
include_scheduling: bool,
include_media: bool,
) -> Result<MediaIter> {
CollectionBuilder::new(path).build()?.close(None)?;
self.export_into_other(path, deck_id, include_scheduling)?;
let mut temp_col = CollectionBuilder::new(path).build()?;
if !include_scheduling {
temp_col.remove_scheduling_information()?;
}
let mut media = HashSet::new();
if include_media {
temp_col.extract_media_paths(&mut media)?;
}
temp_col.close(Some(SchemaVersion::V11))?;
Ok(MediaIter::from_file_list(media))
}
fn export_into_other(
&mut self,
other_path: &str,
deck_id: Option<DeckId>,
export_scheduling_tables: bool,
) -> Result<()> {
self.storage
.db
.execute("ATTACH ? AS other", params!(other_path))?;
let res = self.export_into_other_inner(deck_id, export_scheduling_tables);
self.storage.db.execute_batch("DETACH other")?;
res
}
fn export_into_other_inner(
&mut self,
deck_id: Option<DeckId>,
export_scheduling_tables: bool,
) -> Result<()> {
self.export_decks(deck_id)?;
self.storage.export_cards(deck_id)?;
self.storage.export_notes()?;
self.storage.export_notetypes()?;
if export_scheduling_tables {
self.storage.export_revlog()?;
self.storage.export_deck_configs()?;
}
Ok(())
}
fn export_decks(&mut self, deck_id: Option<DeckId>) -> Result<()> {
let sql = if let Some(did) = deck_id {
self.export_deck_sql(did)?
} else {
include_str!("export_decks.sql").into()
};
self.storage.db.execute_batch(&sql)?;
Ok(())
}
fn export_deck_sql(&mut self, did: DeckId) -> Result<String> {
let mut sql = format!("{} AND id IN ", include_str!("export_decks.sql"));
let deck = self.get_deck(did)?.ok_or(AnkiError::NotFound)?;
let ids = self.storage.deck_id_with_children(&deck)?;
ids_to_string(&mut sql, &ids);
Ok(sql)
}
fn remove_scheduling_information(&mut self) -> Result<()> {
self.storage.remove_system_tags()?;
self.reset_deck_config_ids()?;
self.reset_cards()
}
fn reset_deck_config_ids(&mut self) -> Result<()> {
for mut deck in self.storage.get_all_decks()? {
deck.normal_mut()?.config_id = 1;
self.update_deck(&mut deck)?;
}
Ok(())
}
fn reset_cards(&mut self) -> Result<()> {
let cids = self.storage.get_non_new_card_ids()?;
self.reschedule_cards_as_new(&cids, false, true, false, None)?;
self.storage
.db
.execute_batch(include_str!("reset_cards.sql"))?;
Ok(())
}
fn extract_media_paths(&mut self, names: &mut HashSet<PathBuf>) -> Result<()> {
let notetypes = self.get_all_notetypes()?;
self.extract_media_paths_from_notes(names, &notetypes)?;
self.extract_media_paths_from_notetypes(names, &notetypes);
Ok(())
}
fn extract_media_paths_from_notes(
&mut self,
names: &mut HashSet<PathBuf>,
notetypes: &HashMap<NotetypeId, Arc<Notetype>>,
) -> Result<()> {
let mut stmt = self.storage.db.prepare("SELECT flds, mid FROM notes")?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let flds = row.get_ref(0)?.as_str()?;
let notetype_id: NotetypeId = row.get(1)?;
self.extract_media_paths_from_note(names, flds, notetypes.get(&notetype_id).unwrap());
}
Ok(())
}
fn extract_media_paths_from_note(
&self,
names: &mut HashSet<PathBuf>,
flds: &str,
notetype: &Notetype,
) {
self.extract_latex_paths(names, flds, notetype);
for media_ref in extract_media_refs(flds) {
if is_local_base_name(&media_ref.fname_decoded) {
names.insert(self.media_folder.join(media_ref.fname_decoded.as_ref()));
}
}
}
fn extract_latex_paths(&self, names: &mut HashSet<PathBuf>, flds: &str, notetype: &Notetype) {
for latex in extract_latex(flds, notetype.config.latex_svg).1 {
if is_local_base_name(&latex.fname) {
let path = self.media_folder.join(&latex.fname);
if path.exists() {
names.insert(path);
}
}
}
}
fn extract_media_paths_from_notetypes(
&mut self,
names: &mut HashSet<PathBuf>,
notetypes: &HashMap<NotetypeId, Arc<Notetype>>,
) {
for notetype in notetypes.values() {
notetype.extract_media_paths(names, &self.media_folder);
}
}
}
fn is_local_base_name(name: &str) -> bool {
!is_remote_filename(name) && Path::new(name).parent().is_none()
}
impl Notetype {
fn extract_media_paths(&self, names: &mut HashSet<PathBuf>, media_folder: &Path) {
for name in extract_underscored_css_imports(&self.config.css) {
if is_local_base_name(name) {
names.insert(media_folder.join(name));
}
}
for template in &self.templates {
template.extract_media_paths(names, media_folder);
}
}
}
impl CardTemplate {
fn extract_media_paths(&self, names: &mut HashSet<PathBuf>, media_folder: &Path) {
for template_side in [&self.config.q_format, &self.config.a_format] {
for name in extract_underscored_references(template_side) {
if is_local_base_name(name) {
let path = media_folder.join(name);
// shotgun approach, so check if paths actually exist
if path.exists() {
names.insert(path);
}
}
}
}
}
}
impl SqliteStorage {
fn export_cards(&mut self, deck_id: Option<DeckId>) -> Result<()> {
self.db.execute_batch(include_str!("export_cards.sql"))?;
if let Some(did) = deck_id {
// include siblings outside the exported deck, because they would
// get created on import anyway
self.db.execute(
include_str!("export_siblings.sql"),
named_params! {"did": did},
)?;
}
Ok(())
}
fn export_notes(&mut self) -> Result<()> {
self.db.execute_batch(include_str!("export_notes.sql"))?;
Ok(())
}
fn export_notetypes(&mut self) -> Result<()> {
self.db.execute_batch("DELETE FROM other.notetypes")?;
self.db
.execute_batch(include_str!("export_notetypes.sql"))?;
Ok(())
}
fn export_revlog(&mut self) -> Result<()> {
self.db.execute_batch(include_str!("export_revlog.sql"))?;
Ok(())
}
fn export_deck_configs(&mut self) -> Result<()> {
let id_string = self.exported_deck_config_ids()?;
self.db.execute(
include_str!("export_deck_configs.sql"),
named_params! {"ids": id_string},
)?;
Ok(())
}
fn exported_deck_config_ids(&mut self) -> Result<String> {
let all_decks = self.get_all_decks()?;
let exported_deck_ids = self.exported_deck_ids()?;
let ids = all_decks
.iter()
.filter(|deck| exported_deck_ids.contains(&deck.id))
.filter_map(|deck| deck.config_id());
let mut id_string = String::new();
ids_to_string(&mut id_string, ids);
Ok(id_string)
}
fn exported_deck_ids(&mut self) -> Result<HashSet<DeckId>> {
self.db
.prepare("SELECT DISTINCT id FROM other.decks")?
.query_and_then([], |row| Ok(DeckId(row.get(0)?)))?
.collect()
}
fn remove_system_tags(&mut self) -> Result<()> {
let mut matcher = TagMatcher::new("marked leech")?;
let mut rows_stmt = self.db.prepare("SELECT id, tags FROM notes")?;
let mut update_stmt = self
.db
.prepare_cached("UPDATE notes SET tags = ? WHERE id = ?")?;
let mut rows = rows_stmt.query(params![])?;
while let Some(row) = rows.next()? {
let tags = row.get_ref(1)?.as_str()?;
if matcher.is_match(tags) {
let new_tags = matcher.remove(tags);
let note_id: NoteId = row.get(0)?;
update_stmt.execute(params![new_tags, note_id])?;
}
}
Ok(())
}
fn get_non_new_card_ids(&self) -> Result<Vec<CardId>> {
self.db
.prepare(include_str!("non_new_cards.sql"))?
.query_and_then([], |row| Ok(CardId(row.get(0)?)))?
.collect()
}
}

View file

@ -0,0 +1,7 @@
INSERT INTO other.cards
SELECT *
FROM cards
WHERE did IN (
SELECT did
FROM other.decks
)

View file

@ -0,0 +1,4 @@
INSERT INTO other.deck_config
SELECT *
FROM deck_config
WHERE id IN :ids

View file

@ -0,0 +1,4 @@
INSERT INTO other.decks
SELECT *
FROM decks
WHERE id != 1

View file

@ -0,0 +1,7 @@
INSERT INTO other.notes
SELECT *
FROM notes
WHERE id IN (
SELECT DISTINCT nid
FROM other.cards
)

View file

@ -0,0 +1,7 @@
INSERT INTO other.notetypes
SELECT *
FROM notetypes
WHERE id IN (
SELECT DISTINCT mid
FROM other.notes
)

View file

@ -0,0 +1,7 @@
INSERT INTO other.revlog
SELECT *
FROM revlog
WHERE cid IN (
SELECT cid
FROM other.cards
)

View file

@ -0,0 +1,30 @@
INSERT INTO other.cards
SELECT (
id,
nid,
:did,
ord,
mod,
usn,
type,
queue,
due,
ivl,
factor,
reps,
lapses,
left,
odue,
odid,
flags,
data
)
FROM cards
WHERE id NOT IN (
SELECT id
FROM other.cards
)
AND nid IN (
SELECT DISTINCT nid
FROM other.cards
)

View file

@ -0,0 +1,4 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
mod export;

View file

@ -0,0 +1,4 @@
SELECT id
FROM cards
WHERE queue != 0
OR type != 0

View file

@ -0,0 +1,8 @@
UPDATE cards
SET reps = 0,
lapses = 0,
odid = 0,
odue = 0,
queue = 0,
type = 0,
flags = 0

View file

@ -4,7 +4,8 @@
use std::{
borrow::Cow,
collections::HashMap,
fs::{DirEntry, File},
ffi::OsStr,
fs::File,
io::{self, Read, Write},
path::{Path, PathBuf},
};
@ -67,6 +68,24 @@ impl Collection {
}
}
pub(crate) struct MediaIter(Box<dyn Iterator<Item = io::Result<PathBuf>>>);
impl MediaIter {
pub(crate) fn from_folder(path: &Path) -> Result<Self> {
Ok(Self(Box::new(
read_dir_files(path)?.map(|res| res.map(|entry| entry.path())),
)))
}
pub(crate) fn from_file_list(list: impl IntoIterator<Item = PathBuf> + 'static) -> Self {
Self(Box::new(list.into_iter().map(Ok)))
}
pub(crate) fn empty() -> Self {
Self(Box::new(std::iter::empty()))
}
}
fn export_collection_file(
out_path: impl AsRef<Path>,
col_path: impl AsRef<Path>,
@ -82,12 +101,18 @@ fn export_collection_file(
};
let mut col_file = File::open(col_path)?;
let col_size = col_file.metadata()?.len() as usize;
let media = if let Some(path) = media_dir {
MediaIter::from_folder(&path)?
} else {
MediaIter::empty()
};
export_collection(
meta,
out_path,
&mut col_file,
col_size,
media_dir,
media,
tr,
progress_fn,
)
@ -105,18 +130,18 @@ pub(crate) fn export_colpkg_from_data(
out_path,
&mut col_data,
col_size,
None,
MediaIter::empty(),
tr,
|_| (),
)
}
fn export_collection(
pub(crate) fn export_collection(
meta: Meta,
out_path: impl AsRef<Path>,
col: &mut impl Read,
col_size: usize,
media_dir: Option<PathBuf>,
media: MediaIter,
tr: &I18n,
progress_fn: impl FnMut(usize),
) -> Result<()> {
@ -129,7 +154,7 @@ fn export_collection(
zip.write_all(&meta_bytes)?;
write_collection(&meta, &mut zip, col, col_size)?;
write_dummy_collection(&mut zip, tr)?;
write_media(&meta, &mut zip, media_dir, progress_fn)?;
write_media(&meta, &mut zip, media, progress_fn)?;
zip.finish()?;
Ok(())
@ -203,17 +228,12 @@ fn zstd_copy(reader: &mut impl Read, writer: &mut impl Write, size: usize) -> Re
fn write_media(
meta: &Meta,
zip: &mut ZipWriter<File>,
media_dir: Option<PathBuf>,
media: MediaIter,
progress_fn: impl FnMut(usize),
) -> Result<()> {
let mut media_entries = vec![];
if let Some(media_dir) = media_dir {
write_media_files(meta, zip, &media_dir, &mut media_entries, progress_fn)?;
}
write_media_files(meta, zip, media, &mut media_entries, progress_fn)?;
write_media_map(meta, media_entries, zip)?;
Ok(())
}
@ -251,19 +271,22 @@ fn write_media_map(
fn write_media_files(
meta: &Meta,
zip: &mut ZipWriter<File>,
dir: &Path,
media: MediaIter,
media_entries: &mut Vec<MediaEntry>,
mut progress_fn: impl FnMut(usize),
) -> Result<()> {
let mut copier = MediaCopier::new(meta);
for (index, entry) in read_dir_files(dir)?.enumerate() {
for (index, res) in media.0.enumerate() {
let path = res?;
progress_fn(index);
zip.start_file(index.to_string(), file_options_stored())?;
let entry = entry?;
let name = normalized_unicode_file_name(&entry)?;
let mut file = File::open(entry.path())?;
let mut file = File::open(&path)?;
let file_name = path
.file_name()
.ok_or_else(|| AnkiError::invalid_input("not a file path"))?;
let name = normalized_unicode_file_name(file_name)?;
let (size, sha1) = copier.copy(&mut file, zip)?;
media_entries.push(MediaEntry::new(name, size, sha1));
@ -282,12 +305,11 @@ impl MediaEntry {
}
}
fn normalized_unicode_file_name(entry: &DirEntry) -> Result<String> {
let filename = entry.file_name();
fn normalized_unicode_file_name(filename: &OsStr) -> Result<String> {
let filename = filename.to_str().ok_or_else(|| {
AnkiError::IoError(format!(
"non-unicode file name: {}",
entry.file_name().to_string_lossy()
filename.to_string_lossy()
))
})?;
filename_if_normalized(filename)

View file

@ -1,6 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
mod apkg;
mod colpkg;
mod meta;

View file

@ -34,9 +34,10 @@ impl SchemaVersion {
}
/// Write a list of IDs as '(x,y,...)' into the provided string.
pub(crate) fn ids_to_string<T>(buf: &mut String, ids: &[T])
pub(crate) fn ids_to_string<D, I>(buf: &mut String, ids: I)
where
T: std::fmt::Display,
D: std::fmt::Display,
I: IntoIterator<Item = D>,
{
buf.push('(');
write_comma_separated_ids(buf, ids);
@ -44,15 +45,18 @@ where
}
/// Write a list of Ids as 'x,y,...' into the provided string.
pub(crate) fn write_comma_separated_ids<T>(buf: &mut String, ids: &[T])
pub(crate) fn write_comma_separated_ids<D, I>(buf: &mut String, ids: I)
where
T: std::fmt::Display,
D: std::fmt::Display,
I: IntoIterator<Item = D>,
{
if !ids.is_empty() {
for id in ids.iter().skip(1) {
write!(buf, "{},", id).unwrap();
}
write!(buf, "{}", ids[0]).unwrap();
let mut trailing_sep = false;
for id in ids {
write!(buf, "{},", id).unwrap();
trailing_sep = true;
}
if trailing_sep {
buf.pop();
}
}
@ -73,17 +77,17 @@ mod test {
#[test]
fn ids_string() {
let mut s = String::new();
ids_to_string::<u8>(&mut s, &[]);
ids_to_string(&mut s, &[0; 0]);
assert_eq!(s, "()");
s.clear();
ids_to_string(&mut s, &[7]);
assert_eq!(s, "(7)");
s.clear();
ids_to_string(&mut s, &[7, 6]);
assert_eq!(s, "(6,7)");
assert_eq!(s, "(7,6)");
s.clear();
ids_to_string(&mut s, &[7, 6, 5]);
assert_eq!(s, "(6,5,7)");
assert_eq!(s, "(7,6,5)");
s.clear();
}
}

View file

@ -4,7 +4,7 @@
mod bulkadd;
mod complete;
mod findreplace;
mod matcher;
pub(crate) mod matcher;
mod notes;
mod register;
mod remove;

View file

@ -115,6 +115,36 @@ lazy_static! {
|
\[\[type:[^]]+\]\]
").unwrap();
/// Files included in CSS with a leading underscore.
static ref UNDERSCORED_CSS_IMPORTS: Regex = Regex::new(
r#"(?xi)
(?:@import\s+ # import statement with a bare
"(_[^"]*.css)" # double quoted
| # or
'(_[^']*.css)' # single quoted css filename
)
| # or
(?:url\(\s* # a url function with a
"(_[^"]+)" # double quoted
| # or
'(_[^']+)' # single quoted
| # or
(_.+) # unquoted filename
\s*\))
"#).unwrap();
/// Strings, src and data attributes with a leading underscore.
static ref UNDERSCORED_REFERENCES: Regex = Regex::new(
r#"(?x)
"(_[^"]+)" # double quoted
| # or
'(_[^']+)' # single quoted string
| # or
\b(?:src|data) # a 'src' or 'data' attribute
= # followed by
(_[^ >]+) # an unquoted value
"#).unwrap();
}
pub fn html_to_text_line(html: &str) -> Cow<str> {
@ -216,6 +246,34 @@ pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
out
}
pub(crate) fn extract_underscored_css_imports(text: &str) -> Vec<&str> {
UNDERSCORED_CSS_IMPORTS
.captures_iter(text)
.map(|caps| {
caps.get(1)
.or_else(|| caps.get(2))
.or_else(|| caps.get(3))
.or_else(|| caps.get(4))
.or_else(|| caps.get(5))
.unwrap()
.as_str()
})
.collect()
}
pub(crate) fn extract_underscored_references(text: &str) -> Vec<&str> {
UNDERSCORED_REFERENCES
.captures_iter(text)
.map(|caps| {
caps.get(1)
.or_else(|| caps.get(2))
.or_else(|| caps.get(3))
.unwrap()
.as_str()
})
.collect()
}
pub fn strip_html_preserving_media_filenames(html: &str) -> Cow<str> {
let without_fnames = HTML_MEDIA_TAGS.replace_all(html, r" ${1}${2}${3} ");
let without_html = strip_html(&without_fnames);
@ -379,6 +437,10 @@ lazy_static! {
pub(crate) static ref REMOTE_FILENAME: Regex = Regex::new("(?i)^https?://").unwrap();
}
pub(crate) fn is_remote_filename(name: &str) -> bool {
REMOTE_FILENAME.is_match(name)
}
/// IRI-encode unescaped local paths in HTML fragment.
pub(crate) fn encode_iri_paths(unescaped_html: &str) -> Cow<str> {
transform_html_paths(unescaped_html, |fname| {