bulk tag add/remove/update; canonify on note save

also remove the tag list updated hook - we'll need a better solution in
the future than having the library code call back into the GUI code
This commit is contained in:
Damien Elmes 2020-05-07 17:54:23 +10:00
parent 389b8a0536
commit 2413f286b1
17 changed files with 390 additions and 220 deletions

View file

@ -61,7 +61,6 @@ message BackendInput {
Empty abort_media_sync = 46; Empty abort_media_sync = 46;
Empty before_upload = 47; Empty before_upload = 47;
RegisterTagsIn register_tags = 48; RegisterTagsIn register_tags = 48;
string canonify_tags = 49;
Empty all_tags = 50; Empty all_tags = 50;
int32 get_changed_tags = 51; int32 get_changed_tags = 51;
string get_config_json = 52; string get_config_json = 52;
@ -93,6 +92,8 @@ message BackendInput {
FieldNamesForNotesIn field_names_for_notes = 78; FieldNamesForNotesIn field_names_for_notes = 78;
FindAndReplaceIn find_and_replace = 79; FindAndReplaceIn find_and_replace = 79;
AfterNoteUpdatesIn after_note_updates = 80; AfterNoteUpdatesIn after_note_updates = 80;
AddNoteTagsIn add_note_tags = 81;
UpdateNoteTagsIn update_note_tags = 82;
} }
} }
@ -134,7 +135,6 @@ message BackendOutput {
Empty remove_deck_config = 45; Empty remove_deck_config = 45;
Empty before_upload = 47; Empty before_upload = 47;
bool register_tags = 48; bool register_tags = 48;
CanonifyTagsOut canonify_tags = 49;
AllTagsOut all_tags = 50; AllTagsOut all_tags = 50;
GetChangedTagsOut get_changed_tags = 51; GetChangedTagsOut get_changed_tags = 51;
bytes get_config_json = 52; bytes get_config_json = 52;
@ -165,6 +165,8 @@ message BackendOutput {
FieldNamesForNotesOut field_names_for_notes = 78; FieldNamesForNotesOut field_names_for_notes = 78;
uint32 find_and_replace = 79; uint32 find_and_replace = 79;
Empty after_note_updates = 80; Empty after_note_updates = 80;
uint32 add_note_tags = 81;
uint32 update_note_tags = 82;
BackendError error = 2047; BackendError error = 2047;
} }
@ -491,11 +493,6 @@ message GetChangedTagsOut {
repeated string tags = 1; repeated string tags = 1;
} }
message CanonifyTagsOut {
string tags = 1;
bool tag_list_changed = 2;
}
message SetConfigJson { message SetConfigJson {
string key = 1; string key = 1;
oneof op { oneof op {
@ -740,3 +737,15 @@ message AfterNoteUpdatesIn {
bool mark_notes_modified = 2; bool mark_notes_modified = 2;
bool generate_cards = 3; bool generate_cards = 3;
} }
message AddNoteTagsIn {
repeated int64 nids = 1;
string tags = 2;
}
message UpdateNoteTagsIn {
repeated int64 nids = 1;
string tags = 2;
string replacement = 3;
bool regex = 4;
}

View file

@ -12,7 +12,6 @@ import re
import stat import stat
import time import time
import traceback import traceback
import unicodedata
import weakref import weakref
from typing import Any, Iterable, List, Optional, Sequence, Tuple, Union from typing import Any, Iterable, List, Optional, Sequence, Tuple, Union
@ -810,15 +809,8 @@ select id from cards where odid > 0 and did in %s"""
self.db.execute( self.db.execute(
"update cards set odid=0, odue=0 where id in " + ids2str(ids) "update cards set odid=0, odue=0 where id in " + ids2str(ids)
) )
# notes with non-normalized tags # tags & field cache
cnt = self._normalize_tags() self.tags.register([], clear=True)
if cnt > 0:
syncable_problems.append(
self.tr(TR.DATABASE_CHECK_FIXED_NON_NORMALIZED_TAGS, count=cnt)
)
# tags
self.tags.registerNotes()
# field cache
for m in self.models.all(): for m in self.models.all():
self.after_note_updates( self.after_note_updates(
self.models.nids(m), mark_modified=False, generate_cards=False self.models.nids(m), mark_modified=False, generate_cards=False
@ -887,22 +879,6 @@ and type=0 and queue!=4""",
problems.extend(syncable_problems) problems.extend(syncable_problems)
return ("\n".join(problems), ok) return ("\n".join(problems), ok)
def _normalize_tags(self) -> int:
to_fix = []
for id, tags in self.db.execute("select id, tags from notes"):
norm = unicodedata.normalize("NFC", tags)
if not norm.strip():
norm = ""
elif not norm.startswith(" ") or not norm.endswith(" "):
norm = " " + norm + " "
if norm != tags:
to_fix.append((norm, self.usn(), intTime(), id))
if to_fix:
self.db.executemany(
"update notes set tags=?, usn=?, mod=? where id=?", to_fix
)
return len(to_fix)
def optimize(self) -> None: def optimize(self) -> None:
self.save(trx=False) self.save(trx=False)
self.db.execute("vacuum") self.db.execute("vacuum")

View file

@ -542,30 +542,6 @@ class _SyncStageDidChangeHook:
sync_stage_did_change = _SyncStageDidChangeHook() sync_stage_did_change = _SyncStageDidChangeHook()
class _TagListDidUpdateHook:
_hooks: List[Callable[[], None]] = []
def append(self, cb: Callable[[], None]) -> None:
"""()"""
self._hooks.append(cb)
def remove(self, cb: Callable[[], None]) -> None:
if cb in self._hooks:
self._hooks.remove(cb)
def __call__(self) -> None:
for hook in self._hooks:
try:
hook()
except:
# if the hook fails, remove it
self._hooks.remove(hook)
raise
tag_list_did_update = _TagListDidUpdateHook()
# @@AUTOGEN@@ # @@AUTOGEN@@
# Legacy hook handling # Legacy hook handling

View file

@ -181,7 +181,6 @@ class Anki2Importer(Importer):
"insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)", update "insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)", update
) )
self.dst.updateFieldCache(dirty) self.dst.updateFieldCache(dirty)
self.dst.tags.registerNotes(dirty)
# determine if note is a duplicate, and adjust mid and/or guid as required # determine if note is a duplicate, and adjust mid and/or guid as required
# returns true if note should be added # returns true if note should be added

View file

@ -71,7 +71,6 @@ acq_reps+ret_reps, lapses, card_type_id from cards"""
if "tags" not in note: if "tags" not in note:
note["tags"] = [] note["tags"] = []
note["tags"] += self.col.tags.split(tags) note["tags"] += self.col.tags.split(tags)
note["tags"] = self.col.tags.canonify(note["tags"])
# if it's a new card we can go with the defaults # if it's a new card we can go with the defaults
if row[3] == -1: if row[3] == -1:
continue continue

View file

@ -242,7 +242,6 @@ content in the text file to the correct fields."""
# note id for card updates later # note id for card updates later
for ord, c in list(n.cards.items()): for ord, c in list(n.cards.items()):
self._cards.append((id, ord, c)) self._cards.append((id, ord, c))
self.col.tags.register(n.tags)
return [ return [
id, id,
guid64(), guid64(),
@ -267,14 +266,11 @@ content in the text file to the correct fields."""
if not self.processFields(n, sflds): if not self.processFields(n, sflds):
return None return None
if self._tagsMapped: if self._tagsMapped:
self.col.tags.register(n.tags)
tags = self.col.tags.join(n.tags) tags = self.col.tags.join(n.tags)
return [intTime(), self.col.usn(), n.fieldsStr, tags, id, n.fieldsStr, tags] return [intTime(), self.col.usn(), n.fieldsStr, tags, id, n.fieldsStr, tags]
elif self.tagModified: elif self.tagModified:
tags = self.col.db.scalar("select tags from notes where id = ?", id) tags = self.col.db.scalar("select tags from notes where id = ?", id)
tagList = self.col.tags.split(tags) + self.tagModified.split() tagList = self.col.tags.split(tags) + self.tagModified.split()
tagList = self.col.tags.canonify(tagList)
self.col.tags.register(tagList)
tags = self.col.tags.join(tagList) tags = self.col.tags.join(tagList)
return [intTime(), self.col.usn(), n.fieldsStr, tags, id, n.fieldsStr] return [intTime(), self.col.usn(), n.fieldsStr, tags, id, n.fieldsStr]
else: else:

View file

@ -540,10 +540,6 @@ class RustBackend:
def all_tags(self) -> Iterable[TagUsnTuple]: def all_tags(self) -> Iterable[TagUsnTuple]:
return self._run_command(pb.BackendInput(all_tags=pb.Empty())).all_tags.tags return self._run_command(pb.BackendInput(all_tags=pb.Empty())).all_tags.tags
def canonify_tags(self, tags: str) -> Tuple[str, bool]:
out = self._run_command(pb.BackendInput(canonify_tags=tags)).canonify_tags
return (out.tags, out.tag_list_changed)
def register_tags(self, tags: str, usn: Optional[int], clear_first: bool) -> bool: def register_tags(self, tags: str, usn: Optional[int], clear_first: bool) -> bool:
if usn is None: if usn is None:
preserve_usn = False preserve_usn = False
@ -792,6 +788,22 @@ class RustBackend:
release_gil=True, release_gil=True,
) )
def add_note_tags(self, nids: List[int], tags: str) -> int:
return self._run_command(
pb.BackendInput(add_note_tags=pb.AddNoteTagsIn(nids=nids, tags=tags))
).add_note_tags
def update_note_tags(
self, nids: List[int], tags: str, replacement: str, regex: bool
) -> int:
return self._run_command(
pb.BackendInput(
update_note_tags=pb.UpdateNoteTagsIn(
nids=nids, tags=tags, replacement=replacement, regex=regex
)
)
).update_note_tags
def translate_string_in( def translate_string_in(
key: TR, **kwargs: Union[str, int, float] key: TR, **kwargs: Union[str, int, float]

View file

@ -12,11 +12,10 @@ This module manages the tag cache and tags for notes.
from __future__ import annotations from __future__ import annotations
import re import re
from typing import Callable, Collection, List, Optional, Tuple from typing import Collection, List, Optional, Tuple
import anki # pylint: disable=unused-import import anki # pylint: disable=unused-import
from anki import hooks from anki.utils import ids2str
from anki.utils import ids2str, intTime
class TagManager: class TagManager:
@ -37,10 +36,7 @@ class TagManager:
def register( def register(
self, tags: Collection[str], usn: Optional[int] = None, clear=False self, tags: Collection[str], usn: Optional[int] = None, clear=False
) -> None: ) -> None:
"Given a list of tags, add any missing ones to tag registry." self.col.backend.register_tags(" ".join(tags), usn, clear)
changed = self.col.backend.register_tags(" ".join(tags), usn, clear)
if changed:
hooks.tag_list_did_update()
def registerNotes(self, nids: Optional[List[int]] = None) -> None: def registerNotes(self, nids: Optional[List[int]] = None) -> None:
"Add any missing tags from notes to the tags list." "Add any missing tags from notes to the tags list."
@ -76,40 +72,25 @@ class TagManager:
# Bulk addition/removal from notes # Bulk addition/removal from notes
############################################################# #############################################################
def bulk_add(self, nids: List[int], tags: str) -> int:
"""Add space-separate tags to provided notes, returning changed count."""
return self.col.backend.add_note_tags(nids, tags)
def bulk_update(
self, nids: List[int], tags: str, replacement: str, regex: bool
) -> int:
"""Replace space-separated tags, returning changed count.
Tags replaced with an empty string will be removed."""
return self.col.backend.update_note_tags(nids, tags, replacement, regex)
# legacy routines
def bulkAdd(self, ids: List[int], tags: str, add: bool = True) -> None: def bulkAdd(self, ids: List[int], tags: str, add: bool = True) -> None:
"Add tags in bulk. TAGS is space-separated." "Add tags in bulk. TAGS is space-separated."
newTags = self.split(tags)
if not newTags:
return
# cache tag names
if add: if add:
self.register(newTags) self.bulk_add(ids, tags)
# find notes missing the tags
fn: Callable[[str, str], str]
if add:
l = "tags not "
fn = self.addToStr
else: else:
l = "tags " self.bulk_update(ids, tags, "", False)
fn = self.remFromStr
lim = " or ".join(l + "like ?" for x in newTags)
res = self.col.db.all(
"select id, tags from notes where id in %s and (%s)" % (ids2str(ids), lim),
*["%% %s %%" % y.replace("*", "%") for x, y in enumerate(newTags)],
)
# update tags
def fix(row):
return [
fn(tags, row[1]),
intTime(),
self.col.usn(),
row[0],
]
self.col.db.executemany(
"update notes set tags=?,mod=?,usn=? where id = ?",
[fix(row) for row in res],
)
def bulkRem(self, ids: List[int], tags: str) -> None: def bulkRem(self, ids: List[int], tags: str) -> None:
self.bulkAdd(ids, tags, False) self.bulkAdd(ids, tags, False)
@ -157,13 +138,9 @@ class TagManager:
# List-based utilities # List-based utilities
########################################################################## ##########################################################################
# this is now a no-op - the tags are canonified when the note is saved
def canonify(self, tagList: List[str]) -> List[str]: def canonify(self, tagList: List[str]) -> List[str]:
"Strip duplicates, adjust case to match existing tags, and sort." return tagList
tag_str, changed = self.col.backend.canonify_tags(" ".join(tagList))
if changed:
hooks.tag_list_did_update()
return tag_str.split(" ")
def inList(self, tag: str, tags: List[str]) -> bool: def inList(self, tag: str, tags: List[str]) -> bool:
"True if TAG is in TAGS. Ignore case." "True if TAG is in TAGS. Ignore case."

View file

@ -51,7 +51,6 @@ hooks = [
return_type="bool", return_type="bool",
doc="Warning: this is called on a background thread.", doc="Warning: this is called on a background thread.",
), ),
Hook(name="tag_list_did_update"),
Hook( Hook(
name="field_filter", name="field_filter",
args=[ args=[

View file

@ -1888,7 +1888,6 @@ update cards set usn=?, mod=?, did=? where id in """
gui_hooks.editor_did_fire_typing_timer.append(self.refreshCurrentCard) gui_hooks.editor_did_fire_typing_timer.append(self.refreshCurrentCard)
gui_hooks.editor_did_load_note.append(self.onLoadNote) gui_hooks.editor_did_load_note.append(self.onLoadNote)
gui_hooks.editor_did_unfocus_field.append(self.on_unfocus_field) gui_hooks.editor_did_unfocus_field.append(self.on_unfocus_field)
hooks.tag_list_did_update.append(self.on_tag_list_update)
hooks.note_type_added.append(self.on_item_added) hooks.note_type_added.append(self.on_item_added)
hooks.deck_added.append(self.on_item_added) hooks.deck_added.append(self.on_item_added)
@ -1898,7 +1897,6 @@ update cards set usn=?, mod=?, did=? where id in """
gui_hooks.editor_did_fire_typing_timer.remove(self.refreshCurrentCard) gui_hooks.editor_did_fire_typing_timer.remove(self.refreshCurrentCard)
gui_hooks.editor_did_load_note.remove(self.onLoadNote) gui_hooks.editor_did_load_note.remove(self.onLoadNote)
gui_hooks.editor_did_unfocus_field.remove(self.on_unfocus_field) gui_hooks.editor_did_unfocus_field.remove(self.on_unfocus_field)
hooks.tag_list_did_update.remove(self.on_tag_list_update)
hooks.note_type_added.remove(self.on_item_added) hooks.note_type_added.remove(self.on_item_added)
hooks.deck_added.remove(self.on_item_added) hooks.deck_added.remove(self.on_item_added)

View file

@ -7,7 +7,6 @@ import itertools
import json import json
import mimetypes import mimetypes
import re import re
import unicodedata
import urllib.error import urllib.error
import urllib.parse import urllib.parse
import urllib.request import urllib.request
@ -569,9 +568,7 @@ class Editor:
def saveTags(self) -> None: def saveTags(self) -> None:
if not self.note: if not self.note:
return return
tagsTxt = unicodedata.normalize("NFC", self.tags.text()) self.note.tags = self.mw.col.tags.split(self.tags.text())
self.note.tags = self.mw.col.tags.canonify(self.mw.col.tags.split(tagsTxt))
self.tags.setText(self.mw.col.tags.join(self.note.tags).strip())
if not self.addMode: if not self.addMode:
self.note.flush() self.note.flush()
gui_hooks.editor_did_update_tags(self.note) gui_hooks.editor_did_update_tags(self.note)

View file

@ -1,6 +0,0 @@
database-check-fixed-non-normalized-tags = { $count ->
[one] Fixed tags for one note.
*[other] Fixed tags for {$count} notes.
}

View file

@ -289,7 +289,6 @@ impl Backend {
self.before_upload()?; self.before_upload()?;
OValue::BeforeUpload(pb::Empty {}) OValue::BeforeUpload(pb::Empty {})
} }
Value::CanonifyTags(input) => OValue::CanonifyTags(self.canonify_tags(input)?),
Value::AllTags(_) => OValue::AllTags(self.all_tags()?), Value::AllTags(_) => OValue::AllTags(self.all_tags()?),
Value::RegisterTags(input) => OValue::RegisterTags(self.register_tags(input)?), Value::RegisterTags(input) => OValue::RegisterTags(self.register_tags(input)?),
Value::GetChangedTags(usn) => OValue::GetChangedTags(self.get_changed_tags(usn)?), Value::GetChangedTags(usn) => OValue::GetChangedTags(self.get_changed_tags(usn)?),
@ -368,6 +367,8 @@ impl Backend {
Value::AfterNoteUpdates(input) => { Value::AfterNoteUpdates(input) => {
OValue::AfterNoteUpdates(self.after_note_updates(input)?) OValue::AfterNoteUpdates(self.after_note_updates(input)?)
} }
Value::AddNoteTags(input) => OValue::AddNoteTags(self.add_note_tags(input)?),
Value::UpdateNoteTags(input) => OValue::UpdateNoteTags(self.update_note_tags(input)?),
}) })
} }
@ -778,18 +779,6 @@ impl Backend {
self.with_col(|col| col.transact(None, |col| col.before_upload())) self.with_col(|col| col.transact(None, |col| col.before_upload()))
} }
fn canonify_tags(&self, tags: String) -> Result<pb::CanonifyTagsOut> {
self.with_col(|col| {
col.transact(None, |col| {
col.canonify_tags(&tags, col.usn()?)
.map(|(tags, added)| pb::CanonifyTagsOut {
tags,
tag_list_changed: added,
})
})
})
}
fn all_tags(&self) -> Result<pb::AllTagsOut> { fn all_tags(&self) -> Result<pb::AllTagsOut> {
let tags = self.with_col(|col| col.storage.all_tags())?; let tags = self.with_col(|col| col.storage.all_tags())?;
let tags: Vec<_> = tags let tags: Vec<_> = tags
@ -1092,16 +1081,17 @@ impl Backend {
Some(input.field_name) Some(input.field_name)
}; };
let repl = input.replacement; let repl = input.replacement;
self.with_col(|col| col.find_and_replace(nids, &search, &repl, field_name)) self.with_col(|col| {
col.find_and_replace(nids, &search, &repl, field_name)
.map(|cnt| cnt as u32)
})
} }
fn after_note_updates(&self, input: pb::AfterNoteUpdatesIn) -> Result<pb::Empty> { fn after_note_updates(&self, input: pb::AfterNoteUpdatesIn) -> Result<pb::Empty> {
self.with_col(|col| { self.with_col(|col| {
col.transact(None, |col| { col.transact(None, |col| {
let nids: Vec<_> = input.nids.into_iter().map(NoteID).collect();
col.after_note_updates( col.after_note_updates(
&nids, &to_nids(input.nids),
col.usn()?,
input.generate_cards, input.generate_cards,
input.mark_notes_modified, input.mark_notes_modified,
)?; )?;
@ -1109,6 +1099,29 @@ impl Backend {
}) })
}) })
} }
fn add_note_tags(&self, input: pb::AddNoteTagsIn) -> Result<u32> {
self.with_col(|col| {
col.add_tags_for_notes(&to_nids(input.nids), &input.tags)
.map(|n| n as u32)
})
}
fn update_note_tags(&self, input: pb::UpdateNoteTagsIn) -> Result<u32> {
self.with_col(|col| {
col.replace_tags_for_notes(
&to_nids(input.nids),
&input.tags,
&input.replacement,
input.regex,
)
.map(|n| n as u32)
})
}
}
fn to_nids(ids: Vec<i64>) -> Vec<NoteID> {
ids.into_iter().map(NoteID).collect()
} }
fn translate_arg_to_fluent_val(arg: &pb::TranslateArgValue) -> FluentValue { fn translate_arg_to_fluent_val(arg: &pb::TranslateArgValue) -> FluentValue {

View file

@ -4,12 +4,9 @@
use crate::{ use crate::{
collection::Collection, collection::Collection,
err::{AnkiError, Result}, err::{AnkiError, Result},
notes::NoteID, notes::{NoteID, TransformNoteOutput},
notetype::CardGenContext,
text::normalize_to_nfc, text::normalize_to_nfc,
types::Usn,
}; };
use itertools::Itertools;
use regex::Regex; use regex::Regex;
use std::borrow::Cow; use std::borrow::Cow;
@ -47,7 +44,7 @@ impl Collection {
search_re: &str, search_re: &str,
repl: &str, repl: &str,
field_name: Option<String>, field_name: Option<String>,
) -> Result<u32> { ) -> Result<usize> {
self.transact(None, |col| { self.transact(None, |col| {
let norm = col.normalize_note_text(); let norm = col.normalize_note_text();
let search = if norm { let search = if norm {
@ -56,60 +53,47 @@ impl Collection {
search_re.into() search_re.into()
}; };
let ctx = FindReplaceContext::new(nids, &search, repl, field_name)?; let ctx = FindReplaceContext::new(nids, &search, repl, field_name)?;
col.find_and_replace_inner(ctx, col.usn()?, norm) col.find_and_replace_inner(ctx)
}) })
} }
fn find_and_replace_inner( fn find_and_replace_inner(&mut self, ctx: FindReplaceContext) -> Result<usize> {
&mut self, let mut last_ntid = None;
ctx: FindReplaceContext, let mut field_ord = None;
usn: Usn, self.transform_notes(&ctx.nids, |note, nt| {
normalize_text: bool, if last_ntid != Some(nt.id) {
) -> Result<u32> { field_ord = ctx.field_name.as_ref().and_then(|n| nt.get_field_ord(n));
let mut total_changed = 0; last_ntid = Some(nt.id);
let nids_by_notetype = self.storage.note_ids_by_notetype(&ctx.nids)?; }
for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) {
let nt = self let mut changed = false;
.get_notetype(ntid)? match field_ord {
.ok_or_else(|| AnkiError::invalid_input("missing note type"))?; None => {
let genctx = CardGenContext::new(&nt, usn); // all fields
let field_ord = ctx.field_name.as_ref().and_then(|n| nt.get_field_ord(n)); for txt in &mut note.fields {
for (_, nid) in group { if let Cow::Owned(otxt) = ctx.replace_text(txt) {
let mut note = self.storage.get_note(nid)?.unwrap(); changed = true;
let mut changed = false; *txt = otxt;
match field_ord {
None => {
// all fields
for txt in &mut note.fields {
if let Cow::Owned(otxt) = ctx.replace_text(txt) {
changed = true;
*txt = otxt;
}
}
}
Some(ord) => {
// single field
if let Some(txt) = note.fields.get_mut(ord) {
if let Cow::Owned(otxt) = ctx.replace_text(txt) {
changed = true;
*txt = otxt;
}
} }
} }
} }
if changed { Some(ord) => {
self.update_note_inner_generating_cards( // single field
&genctx, if let Some(txt) = note.fields.get_mut(ord) {
&mut note, if let Cow::Owned(otxt) = ctx.replace_text(txt) {
true, changed = true;
normalize_text, *txt = otxt;
)?; }
total_changed += 1; }
} }
} }
}
Ok(total_changed) Ok(TransformNoteOutput {
changed,
generate_cards: true,
mark_modified: true,
})
})
} }
} }

View file

@ -14,12 +14,20 @@ use crate::{
}; };
use itertools::Itertools; use itertools::Itertools;
use num_integer::Integer; use num_integer::Integer;
use std::{collections::HashSet, convert::TryInto}; use regex::{Regex, Replacer};
use std::{borrow::Cow, collections::HashSet, convert::TryInto};
define_newtype!(NoteID, i64); define_newtype!(NoteID, i64);
// fixme: ensure nulls and x1f not in field contents // fixme: ensure nulls and x1f not in field contents
#[derive(Default)]
pub(crate) struct TransformNoteOutput {
pub changed: bool,
pub generate_cards: bool,
pub mark_modified: bool,
}
#[derive(Debug)] #[derive(Debug)]
pub struct Note { pub struct Note {
pub id: NoteID, pub id: NoteID,
@ -116,6 +124,17 @@ impl Note {
}) })
.collect() .collect()
} }
pub(crate) fn replace_tags<T: Replacer>(&mut self, re: &Regex, mut repl: T) -> bool {
let mut changed = false;
for tag in &mut self.tags {
if let Cow::Owned(rep) = re.replace_all(tag, repl.by_ref()) {
*tag = rep;
changed = true;
}
}
changed
}
} }
impl From<Note> for pb::Note { impl From<Note> for pb::Note {
@ -174,13 +193,10 @@ fn anki_base91(mut n: u64) -> String {
impl Collection { impl Collection {
fn canonify_note_tags(&self, note: &mut Note, usn: Usn) -> Result<()> { fn canonify_note_tags(&self, note: &mut Note, usn: Usn) -> Result<()> {
// fixme: avoid the excess split/join if !note.tags.is_empty() {
note.tags = self let tags = std::mem::replace(&mut note.tags, vec![]);
.canonify_tags(&note.tags.join(" "), usn)? note.tags = self.canonify_tags(tags, usn)?.0;
.0 }
.split(' ')
.map(Into::into)
.collect();
Ok(()) Ok(())
} }
@ -268,38 +284,69 @@ impl Collection {
pub(crate) fn after_note_updates( pub(crate) fn after_note_updates(
&mut self, &mut self,
nids: &[NoteID], nids: &[NoteID],
usn: Usn,
generate_cards: bool, generate_cards: bool,
mark_notes_modified: bool, mark_notes_modified: bool,
) -> Result<()> { ) -> Result<()> {
self.transform_notes(nids, |_note, _nt| {
Ok(TransformNoteOutput {
changed: true,
generate_cards,
mark_modified: mark_notes_modified,
})
})
.map(|_| ())
}
pub(crate) fn transform_notes<F>(
&mut self,
nids: &[NoteID],
mut transformer: F,
) -> Result<usize>
where
F: FnMut(&mut Note, &NoteType) -> Result<TransformNoteOutput>,
{
let nids_by_notetype = self.storage.note_ids_by_notetype(nids)?; let nids_by_notetype = self.storage.note_ids_by_notetype(nids)?;
let norm = self.normalize_note_text(); let norm = self.normalize_note_text();
let mut changed_notes = 0;
let usn = self.usn()?;
for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) { for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) {
let nt = self let nt = self
.get_notetype(ntid)? .get_notetype(ntid)?
.ok_or_else(|| AnkiError::invalid_input("missing note type"))?; .ok_or_else(|| AnkiError::invalid_input("missing note type"))?;
let genctx = CardGenContext::new(&nt, usn);
let mut genctx = None;
for (_, nid) in group { for (_, nid) in group {
// grab the note and transform it
let mut note = self.storage.get_note(nid)?.unwrap(); let mut note = self.storage.get_note(nid)?.unwrap();
if generate_cards { let out = transformer(&mut note, &nt)?;
if !out.changed {
continue;
}
if out.generate_cards {
let ctx = genctx.get_or_insert_with(|| CardGenContext::new(&nt, usn));
self.update_note_inner_generating_cards( self.update_note_inner_generating_cards(
&genctx, &ctx,
&mut note, &mut note,
mark_notes_modified, out.mark_modified,
norm, norm,
)?; )?;
} else { } else {
self.update_note_inner_without_cards( self.update_note_inner_without_cards(
&mut note, &mut note,
&genctx.notetype, &nt,
usn, usn,
mark_notes_modified, out.mark_modified,
norm, norm,
)?; )?;
} }
changed_notes += 1;
} }
} }
Ok(())
Ok(changed_notes)
} }
} }

View file

@ -301,4 +301,13 @@ impl SqliteStorage {
.execute(&[TimestampMillis::now()])?; .execute(&[TimestampMillis::now()])?;
Ok(()) Ok(())
} }
//////////////////////////////////////////
#[cfg(test)]
pub(crate) fn db_scalar<T: rusqlite::types::FromSql>(&self, sql: &str) -> Result<T> {
self.db
.query_row(sql, NO_PARAMS, |r| r.get(0))
.map_err(Into::into)
}
} }

View file

@ -1,9 +1,13 @@
// Copyright: Ankitects Pty Ltd and contributors // Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use crate::collection::Collection; use crate::{
use crate::err::Result; collection::Collection,
use crate::types::Usn; err::{AnkiError, Result},
notes::{NoteID, TransformNoteOutput},
{text::normalize_to_nfc, types::Usn},
};
use regex::{NoExpand, Regex, Replacer};
use std::{borrow::Cow, collections::HashSet}; use std::{borrow::Cow, collections::HashSet};
use unicase::UniCase; use unicase::UniCase;
@ -21,30 +25,43 @@ pub(crate) fn join_tags(tags: &[String]) -> String {
} }
impl Collection { impl Collection {
/// Given a space-separated list of tags, fix case, ordering and duplicates. /// Given a list of tags, fix case, ordering and duplicates.
/// Returns true if any new tags were added. /// Returns true if any new tags were added.
pub(crate) fn canonify_tags(&self, tags: &str, usn: Usn) -> Result<(String, bool)> { pub(crate) fn canonify_tags(&self, tags: Vec<String>, usn: Usn) -> Result<(Vec<String>, bool)> {
let mut tagset = HashSet::new(); let mut seen = HashSet::new();
let mut added = false; let mut added = false;
for tag in split_tags(tags) { let tags: Vec<_> = tags
.iter()
.flat_map(|t| split_tags(t))
.map(|s| normalize_to_nfc(&s))
.collect();
for tag in &tags {
if tag.trim().is_empty() {
continue;
}
let tag = self.register_tag(tag, usn)?; let tag = self.register_tag(tag, usn)?;
if matches!(tag, Cow::Borrowed(_)) { if matches!(tag, Cow::Borrowed(_)) {
added = true; added = true;
} }
tagset.insert(UniCase::new(tag)); seen.insert(UniCase::new(tag));
} }
if tagset.is_empty() { // exit early if no non-empty tags
return Ok(("".into(), added)); if seen.is_empty() {
return Ok((vec![], added));
} }
let mut tags = tagset.into_iter().collect::<Vec<_>>(); // return the sorted, canonified tags
let mut tags = seen.into_iter().collect::<Vec<_>>();
tags.sort_unstable(); tags.sort_unstable();
let tags: Vec<_> = tags
.into_iter()
.map(|s| s.into_inner().to_string())
.collect();
let tags: Vec<_> = tags.into_iter().map(|s| s.into_inner()).collect(); Ok((tags, added))
Ok((format!(" {} ", tags.join(" ")), added))
} }
pub(crate) fn register_tag<'a>(&self, tag: &'a str, usn: Usn) -> Result<Cow<'a, str>> { pub(crate) fn register_tag<'a>(&self, tag: &'a str, usn: Usn) -> Result<Cow<'a, str>> {
@ -69,4 +86,172 @@ impl Collection {
} }
Ok(changed) Ok(changed)
} }
fn replace_tags_for_notes_inner<R: Replacer>(
&mut self,
nids: &[NoteID],
tags: &[Regex],
mut repl: R,
) -> Result<usize> {
self.transact(None, |col| {
col.transform_notes(nids, |note, _nt| {
let mut changed = false;
for re in tags {
if note.replace_tags(re, repl.by_ref()) {
changed = true;
}
}
Ok(TransformNoteOutput {
changed,
generate_cards: false,
mark_modified: true,
})
})
})
}
/// Apply the provided list of regular expressions to note tags,
/// saving any modified notes.
pub fn replace_tags_for_notes(
&mut self,
nids: &[NoteID],
tags: &str,
repl: &str,
regex: bool,
) -> Result<usize> {
// generate regexps
let tags = split_tags(tags)
.map(|tag| {
let tag = if regex {
tag.into()
} else {
regex::escape(tag)
};
Regex::new(&format!("(?i){}", tag))
.map_err(|_| AnkiError::invalid_input("invalid regex"))
})
.collect::<Result<Vec<Regex>>>()?;
if !regex {
self.replace_tags_for_notes_inner(nids, &tags, NoExpand(repl))
} else {
self.replace_tags_for_notes_inner(nids, &tags, repl)
}
}
pub fn add_tags_for_notes(&mut self, nids: &[NoteID], tags: &str) -> Result<usize> {
let tags: Vec<_> = split_tags(tags).collect();
let matcher = regex::RegexSet::new(
tags.iter()
.map(|s| regex::escape(s))
.map(|s| format!("(?i){}", s)),
)
.map_err(|_| AnkiError::invalid_input("invalid regex"))?;
self.transact(None, |col| {
col.transform_notes(nids, |note, _nt| {
let mut need_to_add = true;
let mut match_count = 0;
for tag in &note.tags {
if matcher.is_match(tag) {
match_count += 1;
}
if match_count == tags.len() {
need_to_add = false;
break;
}
}
if need_to_add {
note.tags.extend(tags.iter().map(|&s| s.to_string()))
}
Ok(TransformNoteOutput {
changed: need_to_add,
generate_cards: false,
mark_modified: true,
})
})
})
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{collection::open_test_collection, decks::DeckID};
#[test]
fn tags() -> Result<()> {
let mut col = open_test_collection();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
col.add_note(&mut note, DeckID(1))?;
let tags: String = col.storage.db_scalar("select tags from notes")?;
assert_eq!(tags, "");
// first instance wins in case of duplicates
note.tags = vec!["foo".into(), "FOO".into()];
col.update_note(&mut note)?;
assert_eq!(&note.tags, &["foo"]);
let tags: String = col.storage.db_scalar("select tags from notes")?;
assert_eq!(tags, " foo ");
// existing case is used if in DB
note.tags = vec!["FOO".into()];
col.update_note(&mut note)?;
assert_eq!(&note.tags, &["foo"]);
assert_eq!(tags, " foo ");
// tags are normalized to nfc
note.tags = vec!["\u{fa47}".into()];
col.update_note(&mut note)?;
assert_eq!(&note.tags, &["\u{6f22}"]);
// if code incorrectly adds a space to a tag, it gets split
note.tags = vec!["one two".into()];
col.update_note(&mut note)?;
assert_eq!(&note.tags, &["one", "two"]);
Ok(())
}
#[test]
fn bulk() -> Result<()> {
let mut col = open_test_collection();
let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note();
note.tags.push("test".into());
col.add_note(&mut note, DeckID(1))?;
col.replace_tags_for_notes(&[note.id], "foo test", "bar", false)?;
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(note.tags[0], "bar");
col.replace_tags_for_notes(&[note.id], "b.r", "baz", false)?;
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(note.tags[0], "bar");
col.replace_tags_for_notes(&[note.id], "b.r", "baz", true)?;
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(note.tags[0], "baz");
let cnt = col.add_tags_for_notes(&[note.id], "cee aye")?;
assert_eq!(cnt, 1);
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(&note.tags, &["aye", "baz", "cee"]);
// if all tags already on note, it doesn't get updated
let cnt = col.add_tags_for_notes(&[note.id], "cee aye")?;
assert_eq!(cnt, 0);
// empty replacement deletes tag
col.replace_tags_for_notes(&[note.id], "b.* .*ye", "", true)?;
let note = col.storage.get_note(note.id)?.unwrap();
assert_eq!(&note.tags, &["cee"]);
Ok(())
}
} }