Gather export data instead of copying directly

This commit is contained in:
RumovZ 2022-03-29 16:47:57 +02:00
parent 7d1686e272
commit 84c5ac8a0a
15 changed files with 388 additions and 354 deletions

View file

@ -0,0 +1,284 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{
collections::{HashMap, HashSet},
path::{Path, PathBuf},
};
use itertools::Itertools;
use crate::{
card::{CardQueue, CardType},
decks::NormalDeck,
latex::extract_latex,
prelude::*,
revlog::RevlogEntry,
search::{Negated, SearchNode, SortMode},
storage::ids_to_string,
text::{
extract_media_refs, extract_underscored_css_imports, extract_underscored_references,
is_remote_filename,
},
};
#[derive(Debug, Default)]
pub(super) struct ExportData {
pub(super) decks: Vec<Deck>,
pub(super) notes: Vec<Note>,
pub(super) cards: Vec<Card>,
pub(super) notetypes: Vec<Notetype>,
pub(super) revlog: Vec<RevlogEntry>,
pub(super) decks_configs: Vec<DeckConfig>,
pub(super) media_paths: HashSet<PathBuf>,
}
fn sibling_search(notes: &[Note], cards: &[Card]) -> SearchBuilder {
let mut nids = String::new();
ids_to_string(&mut nids, notes.iter().map(|note| note.id));
let mut cids = String::new();
ids_to_string(&mut cids, cards.iter().map(|card| card.id));
SearchBuilder::from(SearchNode::NoteIds(nids)).and(SearchNode::CardIds(cids).negated())
}
fn optional_deck_search(deck_id: Option<DeckId>) -> SearchNode {
if let Some(did) = deck_id {
SearchNode::from_deck_id(did, true)
} else {
SearchNode::WholeCollection
}
}
fn is_local_base_name(name: &str) -> bool {
!is_remote_filename(name) && Path::new(name).parent().is_none()
}
impl ExportData {
/*
pub(super) fn new(, media_folder: Option<PathBuf>) -> Self {
Self {
with_scheduling,
media_folder,
..Default::default()
}
}
*/
pub(super) fn gather_data(
&mut self,
col: &mut Collection,
deck_id: Option<DeckId>,
with_scheduling: bool,
) -> Result<()> {
self.decks = col.gather_decks(deck_id)?;
let search = optional_deck_search(deck_id);
self.notes = col.gather_notes(search.clone())?;
self.cards = col.gather_cards(search, &self.notes, deck_id)?;
self.notetypes = col.gather_notetypes(&self.notes)?;
if with_scheduling {
self.revlog = col.gather_revlog(&self.cards)?;
self.decks_configs = col.gather_deck_configs(&self.decks)?;
} else {
self.remove_scheduling_information(col);
};
Ok(())
}
pub(super) fn gather_media_paths(&mut self, media_folder: &Path) {
let mut inserter = |name: &str| {
if is_local_base_name(name) {
self.media_paths.insert(media_folder.join(name));
}
};
let svg_getter = svg_getter(&self.notetypes);
for note in self.notes.iter() {
gather_media_paths_from_note(note, &mut inserter, &svg_getter);
}
for notetype in self.notetypes.iter() {
gather_media_paths_from_notetype(notetype, &mut inserter);
}
}
fn remove_scheduling_information(&mut self, col: &Collection) {
self.remove_system_tags();
self.reset_deck_config_ids();
self.reset_cards(col);
}
fn remove_system_tags(&mut self) {
// TODO: case folding? child tags?
for note in self.notes.iter_mut() {
note.tags = std::mem::take(&mut note.tags)
.into_iter()
.filter(|tag| !matches!(tag.as_str(), "marked" | "leech"))
.collect();
}
}
fn reset_deck_config_ids(&mut self) {
for deck in self.decks.iter_mut() {
if let Ok(normal_mut) = deck.normal_mut() {
normal_mut.config_id = 1;
} else {
// TODO: scheduling case
deck.kind = DeckKind::Normal(NormalDeck {
config_id: 1,
..Default::default()
})
}
}
}
fn reset_cards(&mut self, col: &Collection) {
let mut position = col.get_next_card_position();
for card in self.cards.iter_mut() {
if card.ctype != CardType::New || card.queue != CardQueue::New {
card.due = card.original_position.unwrap_or_else(|| {
position += 1;
position - 1
}) as i32;
}
card.interval = 0;
card.ease_factor = 0;
card.reps = 0;
card.lapses = 0;
card.original_deck_id = DeckId(0);
card.original_due = 0;
card.original_position = None;
card.queue = CardQueue::New;
card.ctype = CardType::New;
card.flags = 0;
}
}
}
fn gather_media_paths_from_note(
note: &Note,
inserter: &mut impl FnMut(&str),
svg_getter: &impl Fn(NotetypeId) -> bool,
) {
for field in note.fields() {
for media_ref in extract_media_refs(field) {
inserter(&media_ref.fname_decoded);
}
for latex in extract_latex(field, svg_getter(note.notetype_id)).1 {
inserter(&latex.fname);
}
}
}
fn gather_media_paths_from_notetype(notetype: &Notetype, inserter: &mut impl FnMut(&str)) {
for name in extract_underscored_css_imports(&notetype.config.css) {
inserter(name);
}
for template in &notetype.templates {
for template_side in [&template.config.q_format, &template.config.a_format] {
for name in extract_underscored_references(template_side) {
inserter(name);
}
}
}
}
fn svg_getter(notetypes: &[Notetype]) -> impl Fn(NotetypeId) -> bool {
let svg_map: HashMap<NotetypeId, bool> = notetypes
.iter()
.map(|nt| (nt.id, nt.config.latex_svg))
.collect();
move |nt_id| svg_map.get(&nt_id).copied().unwrap_or_default()
}
impl Collection {
fn gather_decks(&mut self, deck_id: Option<DeckId>) -> Result<Vec<Deck>> {
if let Some(did) = deck_id {
let deck = self.get_deck(did)?.ok_or(AnkiError::NotFound)?;
self.storage
.deck_id_with_children(&deck)?
.iter()
.filter(|did| **did != DeckId(1))
.map(|did| self.storage.get_deck(*did)?.ok_or(AnkiError::NotFound))
.collect()
} else {
Ok(self
.storage
.get_all_decks()?
.into_iter()
.filter(|deck| deck.id != DeckId(1))
.collect())
}
}
fn gather_notes(&mut self, search: SearchNode) -> Result<Vec<Note>> {
self.search_notes(search, SortMode::NoOrder)?
.iter()
.map(|nid| self.storage.get_note(*nid)?.ok_or(AnkiError::NotFound))
.collect()
}
fn gather_cards(
&mut self,
search: SearchNode,
notes: &[Note],
deck_id: Option<DeckId>,
) -> Result<Vec<Card>> {
let mut cards: Vec<_> = self
.search_cards(search, SortMode::NoOrder)?
.iter()
.map(|cid| self.storage.get_card(*cid)?.ok_or(AnkiError::NotFound))
.collect::<Result<_>>()?;
if let Some(did) = deck_id {
let mut siblings = self.gather_siblings(notes, &cards, did)?;
cards.append(&mut siblings);
}
Ok(cards)
}
fn gather_siblings(
&mut self,
notes: &[Note],
cards: &[Card],
deck_id: DeckId,
) -> Result<Vec<Card>> {
self.search_cards(sibling_search(notes, cards), SortMode::NoOrder)?
.iter()
.map(|cid| {
let mut card = self.storage.get_card(*cid)?.ok_or(AnkiError::NotFound)?;
card.deck_id = deck_id;
Ok(card)
})
.collect()
}
fn gather_notetypes(&mut self, notes: &[Note]) -> Result<Vec<Notetype>> {
notes
.iter()
.map(|note| note.notetype_id)
.unique()
.map(|ntid| self.storage.get_notetype(ntid)?.ok_or(AnkiError::NotFound))
.collect()
}
fn gather_revlog(&mut self, cards: &[Card]) -> Result<Vec<RevlogEntry>> {
let mut cids = String::new();
ids_to_string(&mut cids, cards.iter().map(|card| card.id));
self.storage.get_revlog_entries_for_card_ids(cids)
}
fn gather_deck_configs(&mut self, decks: &[Deck]) -> Result<Vec<DeckConfig>> {
decks
.iter()
.filter_map(|deck| deck.config_id())
.unique()
.filter(|config_id| *config_id != DeckConfigId(1))
.map(|config_id| {
self.storage
.get_deck_config(config_id)?
.ok_or(AnkiError::NotFound)
})
.collect()
}
}

View file

@ -0,0 +1,61 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use super::gather::ExportData;
use crate::{prelude::*, revlog::RevlogEntry};
impl Collection {
pub(super) fn insert_data(&mut self, data: &ExportData) -> Result<()> {
self.insert_decks(&data.decks)?;
self.insert_notes(&data.notes)?;
self.insert_cards(&data.cards)?;
self.insert_notetypes(&data.notetypes)?;
self.insert_revlog(&data.revlog)?;
self.insert_deck_configs(&data.decks_configs)?;
Ok(())
}
fn insert_decks(&mut self, decks: &[Deck]) -> Result<()> {
for deck in decks {
self.storage.add_or_update_deck_with_existing_id(deck)?;
}
Ok(())
}
fn insert_notes(&mut self, notes: &[Note]) -> Result<()> {
for note in notes {
self.storage.add_or_update_note(note)?;
}
Ok(())
}
fn insert_cards(&mut self, cards: &[Card]) -> Result<()> {
for card in cards {
self.storage.add_or_update_card(card)?;
}
Ok(())
}
fn insert_notetypes(&mut self, notetypes: &[Notetype]) -> Result<()> {
for notetype in notetypes {
self.storage
.add_or_update_notetype_with_existing_id(notetype)?;
}
Ok(())
}
fn insert_revlog(&mut self, revlog: &[RevlogEntry]) -> Result<()> {
for entry in revlog {
self.storage.add_revlog_entry(entry, false)?;
}
Ok(())
}
fn insert_deck_configs(&mut self, configs: &[DeckConfig]) -> Result<()> {
for config in configs {
self.storage
.add_or_update_deck_config_with_existing_id(config)?;
}
Ok(())
}
}

View file

@ -1,6 +1,8 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
mod gather;
mod insert;
pub mod package;
#[derive(Debug, Clone, Copy, PartialEq)]

View file

@ -1,31 +1,22 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::{
collections::{HashMap, HashSet},
path::{Path, PathBuf},
sync::Arc,
};
use std::path::{Path, PathBuf};
use rusqlite::{named_params, params};
use tempfile::NamedTempFile;
use crate::{
collection::CollectionBuilder,
import_export::package::{
import_export::{
gather::ExportData,
package::{
colpkg::export::{export_collection, MediaIter},
Meta,
},
io::{atomic_rename, tempfile_in_parent_of},
latex::extract_latex,
notetype::CardTemplate,
prelude::*,
storage::{ids_to_string, SchemaVersion, SqliteStorage},
tags::matcher::TagMatcher,
text::{
extract_media_refs, extract_underscored_css_imports, extract_underscored_references,
is_remote_filename,
},
io::{atomic_rename, tempfile_in_parent_of},
prelude::*,
storage::SchemaVersion,
};
impl Collection {
@ -33,8 +24,8 @@ impl Collection {
&mut self,
out_path: impl AsRef<Path>,
deck_id: Option<DeckId>,
include_scheduling: bool,
include_media: bool,
with_scheduling: bool,
with_media: bool,
progress_fn: impl FnMut(usize),
) -> Result<()> {
let temp_apkg = tempfile_in_parent_of(out_path.as_ref())?;
@ -46,8 +37,8 @@ impl Collection {
let media = self.export_collection_extracting_media(
temp_col_path,
deck_id,
include_scheduling,
include_media,
with_scheduling,
with_media,
)?;
let col_size = temp_col.as_file().metadata()?.len() as usize;
@ -67,266 +58,25 @@ impl Collection {
&mut self,
path: &str,
deck_id: Option<DeckId>,
include_scheduling: bool,
include_media: bool,
with_scheduling: bool,
with_media: bool,
) -> Result<MediaIter> {
CollectionBuilder::new(path).build()?.close(None)?;
self.export_into_other(path, deck_id, include_scheduling)?;
let mut data = ExportData::default();
data.gather_data(self, deck_id, with_scheduling)?;
if with_media {
data.gather_media_paths(&self.media_folder);
}
let mut temp_col = CollectionBuilder::new(path).build()?;
if !include_scheduling {
temp_col.remove_scheduling_information()?;
}
let mut media = HashSet::new();
if include_media {
temp_col.extract_media_paths(&mut media)?;
}
let mut temp_col = Collection::new_minimal(path)?;
temp_col.insert_data(&data)?;
temp_col.close(Some(SchemaVersion::V11))?;
Ok(MediaIter::from_file_list(media))
Ok(MediaIter::from_file_list(data.media_paths))
}
fn export_into_other(
&mut self,
other_path: &str,
deck_id: Option<DeckId>,
export_scheduling_tables: bool,
) -> Result<()> {
self.storage
.db
.execute("ATTACH ? AS other", params!(other_path))?;
let res = self.export_into_other_inner(deck_id, export_scheduling_tables);
self.storage.db.execute_batch("DETACH other")?;
res
}
fn export_into_other_inner(
&mut self,
deck_id: Option<DeckId>,
export_scheduling_tables: bool,
) -> Result<()> {
self.export_decks(deck_id)?;
self.storage.export_cards(deck_id)?;
self.storage.export_notes()?;
self.storage.export_notetypes()?;
if export_scheduling_tables {
self.storage.export_revlog()?;
self.storage.export_deck_configs()?;
}
Ok(())
}
fn export_decks(&mut self, deck_id: Option<DeckId>) -> Result<()> {
let sql = if let Some(did) = deck_id {
self.export_deck_sql(did)?
} else {
include_str!("export_decks.sql").into()
};
self.storage.db.execute_batch(&sql)?;
Ok(())
}
fn export_deck_sql(&mut self, did: DeckId) -> Result<String> {
let mut sql = format!("{} AND id IN ", include_str!("export_decks.sql"));
let deck = self.get_deck(did)?.ok_or(AnkiError::NotFound)?;
let ids = self.storage.deck_id_with_children(&deck)?;
ids_to_string(&mut sql, &ids);
Ok(sql)
}
fn remove_scheduling_information(&mut self) -> Result<()> {
self.storage.remove_system_tags()?;
self.reset_deck_config_ids()?;
self.reset_cards()
}
fn reset_deck_config_ids(&mut self) -> Result<()> {
for mut deck in self.storage.get_all_decks()? {
deck.normal_mut()?.config_id = 1;
self.update_deck(&mut deck)?;
}
Ok(())
}
fn reset_cards(&mut self) -> Result<()> {
let cids = self.storage.get_non_new_card_ids()?;
self.reschedule_cards_as_new(&cids, false, true, false, None)?;
self.storage
.db
.execute_batch(include_str!("reset_cards.sql"))?;
Ok(())
}
fn extract_media_paths(&mut self, names: &mut HashSet<PathBuf>) -> Result<()> {
let notetypes = self.get_all_notetypes()?;
self.extract_media_paths_from_notes(names, &notetypes)?;
self.extract_media_paths_from_notetypes(names, &notetypes);
Ok(())
}
fn extract_media_paths_from_notes(
&mut self,
names: &mut HashSet<PathBuf>,
notetypes: &HashMap<NotetypeId, Arc<Notetype>>,
) -> Result<()> {
let mut stmt = self.storage.db.prepare("SELECT flds, mid FROM notes")?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let flds = row.get_ref(0)?.as_str()?;
let notetype_id: NotetypeId = row.get(1)?;
self.extract_media_paths_from_note(names, flds, notetypes.get(&notetype_id).unwrap());
}
Ok(())
}
fn extract_media_paths_from_note(
&self,
names: &mut HashSet<PathBuf>,
flds: &str,
notetype: &Notetype,
) {
self.extract_latex_paths(names, flds, notetype);
for media_ref in extract_media_refs(flds) {
if is_local_base_name(&media_ref.fname_decoded) {
names.insert(self.media_folder.join(media_ref.fname_decoded.as_ref()));
}
}
}
fn extract_latex_paths(&self, names: &mut HashSet<PathBuf>, flds: &str, notetype: &Notetype) {
for latex in extract_latex(flds, notetype.config.latex_svg).1 {
if is_local_base_name(&latex.fname) {
names.insert(self.media_folder.join(&latex.fname));
}
}
}
fn extract_media_paths_from_notetypes(
&mut self,
names: &mut HashSet<PathBuf>,
notetypes: &HashMap<NotetypeId, Arc<Notetype>>,
) {
for notetype in notetypes.values() {
notetype.extract_media_paths(names, &self.media_folder);
}
}
}
fn is_local_base_name(name: &str) -> bool {
!is_remote_filename(name) && Path::new(name).parent().is_none()
}
impl Notetype {
fn extract_media_paths(&self, names: &mut HashSet<PathBuf>, media_folder: &Path) {
for name in extract_underscored_css_imports(&self.config.css) {
if is_local_base_name(name) {
names.insert(media_folder.join(name));
}
}
for template in &self.templates {
template.extract_media_paths(names, media_folder);
}
}
}
impl CardTemplate {
fn extract_media_paths(&self, names: &mut HashSet<PathBuf>, media_folder: &Path) {
for template_side in [&self.config.q_format, &self.config.a_format] {
for name in extract_underscored_references(template_side) {
if is_local_base_name(name) {
names.insert(media_folder.join(name));
}
}
}
}
}
impl SqliteStorage {
fn export_cards(&mut self, deck_id: Option<DeckId>) -> Result<()> {
self.db.execute_batch(include_str!("export_cards.sql"))?;
if let Some(did) = deck_id {
// include siblings outside the exported deck, because they would
// get created on import anyway
self.db.execute(
include_str!("export_siblings.sql"),
named_params! {"did": did},
)?;
}
Ok(())
}
fn export_notes(&mut self) -> Result<()> {
self.db.execute_batch(include_str!("export_notes.sql"))?;
Ok(())
}
fn export_notetypes(&mut self) -> Result<()> {
self.db.execute_batch("DELETE FROM other.notetypes")?;
self.db
.execute_batch(include_str!("export_notetypes.sql"))?;
Ok(())
}
fn export_revlog(&mut self) -> Result<()> {
self.db.execute_batch(include_str!("export_revlog.sql"))?;
Ok(())
}
fn export_deck_configs(&mut self) -> Result<()> {
let id_string = self.exported_deck_config_ids()?;
self.db.execute(
include_str!("export_deck_configs.sql"),
named_params! {"ids": id_string},
)?;
Ok(())
}
fn exported_deck_config_ids(&mut self) -> Result<String> {
let all_decks = self.get_all_decks()?;
let exported_deck_ids = self.exported_deck_ids()?;
let ids = all_decks
.iter()
.filter(|deck| exported_deck_ids.contains(&deck.id))
.filter_map(|deck| deck.config_id());
let mut id_string = String::new();
ids_to_string(&mut id_string, ids);
Ok(id_string)
}
fn exported_deck_ids(&mut self) -> Result<HashSet<DeckId>> {
self.db
.prepare("SELECT DISTINCT id FROM other.decks")?
.query_and_then([], |row| Ok(DeckId(row.get(0)?)))?
.collect()
}
fn remove_system_tags(&mut self) -> Result<()> {
let mut matcher = TagMatcher::new("marked leech")?;
let mut rows_stmt = self.db.prepare("SELECT id, tags FROM notes")?;
let mut update_stmt = self
.db
.prepare_cached("UPDATE notes SET tags = ? WHERE id = ?")?;
let mut rows = rows_stmt.query(params![])?;
while let Some(row) = rows.next()? {
let tags = row.get_ref(1)?.as_str()?;
if matcher.is_match(tags) {
let new_tags = matcher.remove(tags);
let note_id: NoteId = row.get(0)?;
update_stmt.execute(params![new_tags, note_id])?;
}
}
Ok(())
}
fn get_non_new_card_ids(&self) -> Result<Vec<CardId>> {
self.db
.prepare(include_str!("non_new_cards.sql"))?
.query_and_then([], |row| Ok(CardId(row.get(0)?)))?
.collect()
fn new_minimal(path: impl Into<PathBuf>) -> Result<Self> {
let col = CollectionBuilder::new(path).build()?;
col.storage.db.execute_batch("DELETE FROM notetypes")?;
Ok(col)
}
}

View file

@ -1,7 +0,0 @@
INSERT INTO other.cards
SELECT *
FROM cards
WHERE did IN (
SELECT did
FROM other.decks
)

View file

@ -1,4 +0,0 @@
INSERT INTO other.deck_config
SELECT *
FROM deck_config
WHERE id IN :ids

View file

@ -1,4 +0,0 @@
INSERT INTO other.decks
SELECT *
FROM decks
WHERE id != 1

View file

@ -1,7 +0,0 @@
INSERT INTO other.notes
SELECT *
FROM notes
WHERE id IN (
SELECT DISTINCT nid
FROM other.cards
)

View file

@ -1,7 +0,0 @@
INSERT INTO other.notetypes
SELECT *
FROM notetypes
WHERE id IN (
SELECT DISTINCT mid
FROM other.notes
)

View file

@ -1,7 +0,0 @@
INSERT INTO other.revlog
SELECT *
FROM revlog
WHERE cid IN (
SELECT cid
FROM other.cards
)

View file

@ -1,30 +0,0 @@
INSERT INTO other.cards
SELECT (
id,
nid,
:did,
ord,
mod,
usn,
type,
queue,
due,
ivl,
factor,
reps,
lapses,
left,
odue,
odid,
flags,
data
)
FROM cards
WHERE id NOT IN (
SELECT id
FROM other.cards
)
AND nid IN (
SELECT DISTINCT nid
FROM other.cards
)

View file

@ -1,4 +0,0 @@
SELECT id
FROM cards
WHERE queue != 0
OR type != 0

View file

@ -1,8 +0,0 @@
UPDATE cards
SET reps = 0,
lapses = 0,
odid = 0,
odue = 0,
queue = 0,
type = 0,
flags = 0

View file

@ -127,6 +127,14 @@ impl Default for SearchBuilder {
}
impl SearchNode {
pub fn from_deck_id(did: DeckId, with_children: bool) -> Self {
if with_children {
Self::DeckIdWithChildren(did)
} else {
Self::DeckIdWithoutChildren(did)
}
}
/// Construct [SearchNode] from an unescaped deck name.
pub fn from_deck_name(name: &str) -> Self {
Self::Deck(escape_anki_wildcards_for_search_node(name))

View file

@ -120,6 +120,13 @@ impl SqliteStorage {
.collect()
}
pub(crate) fn get_revlog_entries_for_card_ids(&self, cids: String) -> Result<Vec<RevlogEntry>> {
self.db
.prepare_cached(&format!("{} where cid in {cids}", include_str!("get.sql"),))?
.query_and_then([], row_to_revlog_entry)?
.collect()
}
/// This includes entries from deleted cards.
pub(crate) fn get_all_revlog_entries(
&self,