diff --git a/proto/backend.proto b/proto/backend.proto index 32ac11817..96fea75e3 100644 --- a/proto/backend.proto +++ b/proto/backend.proto @@ -61,7 +61,6 @@ message BackendInput { Empty abort_media_sync = 46; Empty before_upload = 47; RegisterTagsIn register_tags = 48; - string canonify_tags = 49; Empty all_tags = 50; int32 get_changed_tags = 51; string get_config_json = 52; @@ -93,6 +92,8 @@ message BackendInput { FieldNamesForNotesIn field_names_for_notes = 78; FindAndReplaceIn find_and_replace = 79; AfterNoteUpdatesIn after_note_updates = 80; + AddNoteTagsIn add_note_tags = 81; + UpdateNoteTagsIn update_note_tags = 82; } } @@ -134,7 +135,6 @@ message BackendOutput { Empty remove_deck_config = 45; Empty before_upload = 47; bool register_tags = 48; - CanonifyTagsOut canonify_tags = 49; AllTagsOut all_tags = 50; GetChangedTagsOut get_changed_tags = 51; bytes get_config_json = 52; @@ -165,6 +165,8 @@ message BackendOutput { FieldNamesForNotesOut field_names_for_notes = 78; uint32 find_and_replace = 79; Empty after_note_updates = 80; + uint32 add_note_tags = 81; + uint32 update_note_tags = 82; BackendError error = 2047; } @@ -491,11 +493,6 @@ message GetChangedTagsOut { repeated string tags = 1; } -message CanonifyTagsOut { - string tags = 1; - bool tag_list_changed = 2; -} - message SetConfigJson { string key = 1; oneof op { @@ -740,3 +737,15 @@ message AfterNoteUpdatesIn { bool mark_notes_modified = 2; bool generate_cards = 3; } + +message AddNoteTagsIn { + repeated int64 nids = 1; + string tags = 2; +} + +message UpdateNoteTagsIn { + repeated int64 nids = 1; + string tags = 2; + string replacement = 3; + bool regex = 4; +} diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index b9df63b1a..f5030f112 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -12,7 +12,6 @@ import re import stat import time import traceback -import unicodedata import weakref from typing import Any, Iterable, List, Optional, Sequence, Tuple, Union @@ -810,15 +809,8 @@ select id from cards where odid > 0 and did in %s""" self.db.execute( "update cards set odid=0, odue=0 where id in " + ids2str(ids) ) - # notes with non-normalized tags - cnt = self._normalize_tags() - if cnt > 0: - syncable_problems.append( - self.tr(TR.DATABASE_CHECK_FIXED_NON_NORMALIZED_TAGS, count=cnt) - ) - # tags - self.tags.registerNotes() - # field cache + # tags & field cache + self.tags.register([], clear=True) for m in self.models.all(): self.after_note_updates( self.models.nids(m), mark_modified=False, generate_cards=False @@ -887,22 +879,6 @@ and type=0 and queue!=4""", problems.extend(syncable_problems) return ("\n".join(problems), ok) - def _normalize_tags(self) -> int: - to_fix = [] - for id, tags in self.db.execute("select id, tags from notes"): - norm = unicodedata.normalize("NFC", tags) - if not norm.strip(): - norm = "" - elif not norm.startswith(" ") or not norm.endswith(" "): - norm = " " + norm + " " - if norm != tags: - to_fix.append((norm, self.usn(), intTime(), id)) - if to_fix: - self.db.executemany( - "update notes set tags=?, usn=?, mod=? where id=?", to_fix - ) - return len(to_fix) - def optimize(self) -> None: self.save(trx=False) self.db.execute("vacuum") diff --git a/pylib/anki/hooks.py b/pylib/anki/hooks.py index 289d0c07b..00bcf1ee4 100644 --- a/pylib/anki/hooks.py +++ b/pylib/anki/hooks.py @@ -542,30 +542,6 @@ class _SyncStageDidChangeHook: sync_stage_did_change = _SyncStageDidChangeHook() - - -class _TagListDidUpdateHook: - _hooks: List[Callable[[], None]] = [] - - def append(self, cb: Callable[[], None]) -> None: - """()""" - self._hooks.append(cb) - - def remove(self, cb: Callable[[], None]) -> None: - if cb in self._hooks: - self._hooks.remove(cb) - - def __call__(self) -> None: - for hook in self._hooks: - try: - hook() - except: - # if the hook fails, remove it - self._hooks.remove(hook) - raise - - -tag_list_did_update = _TagListDidUpdateHook() # @@AUTOGEN@@ # Legacy hook handling diff --git a/pylib/anki/importing/anki2.py b/pylib/anki/importing/anki2.py index 296b3b2c9..d01e351ea 100644 --- a/pylib/anki/importing/anki2.py +++ b/pylib/anki/importing/anki2.py @@ -181,7 +181,6 @@ class Anki2Importer(Importer): "insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)", update ) self.dst.updateFieldCache(dirty) - self.dst.tags.registerNotes(dirty) # determine if note is a duplicate, and adjust mid and/or guid as required # returns true if note should be added diff --git a/pylib/anki/importing/mnemo.py b/pylib/anki/importing/mnemo.py index dd0984923..3ae882ca3 100644 --- a/pylib/anki/importing/mnemo.py +++ b/pylib/anki/importing/mnemo.py @@ -71,7 +71,6 @@ acq_reps+ret_reps, lapses, card_type_id from cards""" if "tags" not in note: note["tags"] = [] note["tags"] += self.col.tags.split(tags) - note["tags"] = self.col.tags.canonify(note["tags"]) # if it's a new card we can go with the defaults if row[3] == -1: continue diff --git a/pylib/anki/importing/noteimp.py b/pylib/anki/importing/noteimp.py index 158963cae..3dd68e927 100644 --- a/pylib/anki/importing/noteimp.py +++ b/pylib/anki/importing/noteimp.py @@ -242,7 +242,6 @@ content in the text file to the correct fields.""" # note id for card updates later for ord, c in list(n.cards.items()): self._cards.append((id, ord, c)) - self.col.tags.register(n.tags) return [ id, guid64(), @@ -267,14 +266,11 @@ content in the text file to the correct fields.""" if not self.processFields(n, sflds): return None if self._tagsMapped: - self.col.tags.register(n.tags) tags = self.col.tags.join(n.tags) return [intTime(), self.col.usn(), n.fieldsStr, tags, id, n.fieldsStr, tags] elif self.tagModified: tags = self.col.db.scalar("select tags from notes where id = ?", id) tagList = self.col.tags.split(tags) + self.tagModified.split() - tagList = self.col.tags.canonify(tagList) - self.col.tags.register(tagList) tags = self.col.tags.join(tagList) return [intTime(), self.col.usn(), n.fieldsStr, tags, id, n.fieldsStr] else: diff --git a/pylib/anki/rsbackend.py b/pylib/anki/rsbackend.py index ff397069b..10ecc7419 100644 --- a/pylib/anki/rsbackend.py +++ b/pylib/anki/rsbackend.py @@ -540,10 +540,6 @@ class RustBackend: def all_tags(self) -> Iterable[TagUsnTuple]: return self._run_command(pb.BackendInput(all_tags=pb.Empty())).all_tags.tags - def canonify_tags(self, tags: str) -> Tuple[str, bool]: - out = self._run_command(pb.BackendInput(canonify_tags=tags)).canonify_tags - return (out.tags, out.tag_list_changed) - def register_tags(self, tags: str, usn: Optional[int], clear_first: bool) -> bool: if usn is None: preserve_usn = False @@ -792,6 +788,22 @@ class RustBackend: release_gil=True, ) + def add_note_tags(self, nids: List[int], tags: str) -> int: + return self._run_command( + pb.BackendInput(add_note_tags=pb.AddNoteTagsIn(nids=nids, tags=tags)) + ).add_note_tags + + def update_note_tags( + self, nids: List[int], tags: str, replacement: str, regex: bool + ) -> int: + return self._run_command( + pb.BackendInput( + update_note_tags=pb.UpdateNoteTagsIn( + nids=nids, tags=tags, replacement=replacement, regex=regex + ) + ) + ).update_note_tags + def translate_string_in( key: TR, **kwargs: Union[str, int, float] diff --git a/pylib/anki/tags.py b/pylib/anki/tags.py index e9beff0dc..db3e3daba 100644 --- a/pylib/anki/tags.py +++ b/pylib/anki/tags.py @@ -12,11 +12,10 @@ This module manages the tag cache and tags for notes. from __future__ import annotations import re -from typing import Callable, Collection, List, Optional, Tuple +from typing import Collection, List, Optional, Tuple import anki # pylint: disable=unused-import -from anki import hooks -from anki.utils import ids2str, intTime +from anki.utils import ids2str class TagManager: @@ -37,10 +36,7 @@ class TagManager: def register( self, tags: Collection[str], usn: Optional[int] = None, clear=False ) -> None: - "Given a list of tags, add any missing ones to tag registry." - changed = self.col.backend.register_tags(" ".join(tags), usn, clear) - if changed: - hooks.tag_list_did_update() + self.col.backend.register_tags(" ".join(tags), usn, clear) def registerNotes(self, nids: Optional[List[int]] = None) -> None: "Add any missing tags from notes to the tags list." @@ -76,40 +72,25 @@ class TagManager: # Bulk addition/removal from notes ############################################################# + def bulk_add(self, nids: List[int], tags: str) -> int: + """Add space-separate tags to provided notes, returning changed count.""" + return self.col.backend.add_note_tags(nids, tags) + + def bulk_update( + self, nids: List[int], tags: str, replacement: str, regex: bool + ) -> int: + """Replace space-separated tags, returning changed count. + Tags replaced with an empty string will be removed.""" + return self.col.backend.update_note_tags(nids, tags, replacement, regex) + + # legacy routines + def bulkAdd(self, ids: List[int], tags: str, add: bool = True) -> None: "Add tags in bulk. TAGS is space-separated." - newTags = self.split(tags) - if not newTags: - return - # cache tag names if add: - self.register(newTags) - # find notes missing the tags - fn: Callable[[str, str], str] - if add: - l = "tags not " - fn = self.addToStr + self.bulk_add(ids, tags) else: - l = "tags " - fn = self.remFromStr - lim = " or ".join(l + "like ?" for x in newTags) - res = self.col.db.all( - "select id, tags from notes where id in %s and (%s)" % (ids2str(ids), lim), - *["%% %s %%" % y.replace("*", "%") for x, y in enumerate(newTags)], - ) - # update tags - def fix(row): - return [ - fn(tags, row[1]), - intTime(), - self.col.usn(), - row[0], - ] - - self.col.db.executemany( - "update notes set tags=?,mod=?,usn=? where id = ?", - [fix(row) for row in res], - ) + self.bulk_update(ids, tags, "", False) def bulkRem(self, ids: List[int], tags: str) -> None: self.bulkAdd(ids, tags, False) @@ -157,13 +138,9 @@ class TagManager: # List-based utilities ########################################################################## + # this is now a no-op - the tags are canonified when the note is saved def canonify(self, tagList: List[str]) -> List[str]: - "Strip duplicates, adjust case to match existing tags, and sort." - tag_str, changed = self.col.backend.canonify_tags(" ".join(tagList)) - if changed: - hooks.tag_list_did_update() - - return tag_str.split(" ") + return tagList def inList(self, tag: str, tags: List[str]) -> bool: "True if TAG is in TAGS. Ignore case." diff --git a/pylib/tools/genhooks.py b/pylib/tools/genhooks.py index f15436168..655a6f743 100644 --- a/pylib/tools/genhooks.py +++ b/pylib/tools/genhooks.py @@ -51,7 +51,6 @@ hooks = [ return_type="bool", doc="Warning: this is called on a background thread.", ), - Hook(name="tag_list_did_update"), Hook( name="field_filter", args=[ diff --git a/qt/aqt/browser.py b/qt/aqt/browser.py index 5072b128a..e22f04092 100644 --- a/qt/aqt/browser.py +++ b/qt/aqt/browser.py @@ -1888,7 +1888,6 @@ update cards set usn=?, mod=?, did=? where id in """ gui_hooks.editor_did_fire_typing_timer.append(self.refreshCurrentCard) gui_hooks.editor_did_load_note.append(self.onLoadNote) gui_hooks.editor_did_unfocus_field.append(self.on_unfocus_field) - hooks.tag_list_did_update.append(self.on_tag_list_update) hooks.note_type_added.append(self.on_item_added) hooks.deck_added.append(self.on_item_added) @@ -1898,7 +1897,6 @@ update cards set usn=?, mod=?, did=? where id in """ gui_hooks.editor_did_fire_typing_timer.remove(self.refreshCurrentCard) gui_hooks.editor_did_load_note.remove(self.onLoadNote) gui_hooks.editor_did_unfocus_field.remove(self.on_unfocus_field) - hooks.tag_list_did_update.remove(self.on_tag_list_update) hooks.note_type_added.remove(self.on_item_added) hooks.deck_added.remove(self.on_item_added) diff --git a/qt/aqt/editor.py b/qt/aqt/editor.py index b16ed7e33..ef4c26047 100644 --- a/qt/aqt/editor.py +++ b/qt/aqt/editor.py @@ -7,7 +7,6 @@ import itertools import json import mimetypes import re -import unicodedata import urllib.error import urllib.parse import urllib.request @@ -569,9 +568,7 @@ class Editor: def saveTags(self) -> None: if not self.note: return - tagsTxt = unicodedata.normalize("NFC", self.tags.text()) - self.note.tags = self.mw.col.tags.canonify(self.mw.col.tags.split(tagsTxt)) - self.tags.setText(self.mw.col.tags.join(self.note.tags).strip()) + self.note.tags = self.mw.col.tags.split(self.tags.text()) if not self.addMode: self.note.flush() gui_hooks.editor_did_update_tags(self.note) diff --git a/rslib/ftl/database-check.ftl b/rslib/ftl/database-check.ftl deleted file mode 100644 index 320dd92e3..000000000 --- a/rslib/ftl/database-check.ftl +++ /dev/null @@ -1,6 +0,0 @@ -database-check-fixed-non-normalized-tags = { $count -> - [one] Fixed tags for one note. - *[other] Fixed tags for {$count} notes. - } - - diff --git a/rslib/src/backend/mod.rs b/rslib/src/backend/mod.rs index b4952efaf..4011c0331 100644 --- a/rslib/src/backend/mod.rs +++ b/rslib/src/backend/mod.rs @@ -289,7 +289,6 @@ impl Backend { self.before_upload()?; OValue::BeforeUpload(pb::Empty {}) } - Value::CanonifyTags(input) => OValue::CanonifyTags(self.canonify_tags(input)?), Value::AllTags(_) => OValue::AllTags(self.all_tags()?), Value::RegisterTags(input) => OValue::RegisterTags(self.register_tags(input)?), Value::GetChangedTags(usn) => OValue::GetChangedTags(self.get_changed_tags(usn)?), @@ -368,6 +367,8 @@ impl Backend { Value::AfterNoteUpdates(input) => { OValue::AfterNoteUpdates(self.after_note_updates(input)?) } + Value::AddNoteTags(input) => OValue::AddNoteTags(self.add_note_tags(input)?), + Value::UpdateNoteTags(input) => OValue::UpdateNoteTags(self.update_note_tags(input)?), }) } @@ -778,18 +779,6 @@ impl Backend { self.with_col(|col| col.transact(None, |col| col.before_upload())) } - fn canonify_tags(&self, tags: String) -> Result { - self.with_col(|col| { - col.transact(None, |col| { - col.canonify_tags(&tags, col.usn()?) - .map(|(tags, added)| pb::CanonifyTagsOut { - tags, - tag_list_changed: added, - }) - }) - }) - } - fn all_tags(&self) -> Result { let tags = self.with_col(|col| col.storage.all_tags())?; let tags: Vec<_> = tags @@ -1092,16 +1081,17 @@ impl Backend { Some(input.field_name) }; let repl = input.replacement; - self.with_col(|col| col.find_and_replace(nids, &search, &repl, field_name)) + self.with_col(|col| { + col.find_and_replace(nids, &search, &repl, field_name) + .map(|cnt| cnt as u32) + }) } fn after_note_updates(&self, input: pb::AfterNoteUpdatesIn) -> Result { self.with_col(|col| { col.transact(None, |col| { - let nids: Vec<_> = input.nids.into_iter().map(NoteID).collect(); col.after_note_updates( - &nids, - col.usn()?, + &to_nids(input.nids), input.generate_cards, input.mark_notes_modified, )?; @@ -1109,6 +1099,29 @@ impl Backend { }) }) } + + fn add_note_tags(&self, input: pb::AddNoteTagsIn) -> Result { + self.with_col(|col| { + col.add_tags_for_notes(&to_nids(input.nids), &input.tags) + .map(|n| n as u32) + }) + } + + fn update_note_tags(&self, input: pb::UpdateNoteTagsIn) -> Result { + self.with_col(|col| { + col.replace_tags_for_notes( + &to_nids(input.nids), + &input.tags, + &input.replacement, + input.regex, + ) + .map(|n| n as u32) + }) + } +} + +fn to_nids(ids: Vec) -> Vec { + ids.into_iter().map(NoteID).collect() } fn translate_arg_to_fluent_val(arg: &pb::TranslateArgValue) -> FluentValue { diff --git a/rslib/src/findreplace.rs b/rslib/src/findreplace.rs index 099df8e17..52b8bde65 100644 --- a/rslib/src/findreplace.rs +++ b/rslib/src/findreplace.rs @@ -4,12 +4,9 @@ use crate::{ collection::Collection, err::{AnkiError, Result}, - notes::NoteID, - notetype::CardGenContext, + notes::{NoteID, TransformNoteOutput}, text::normalize_to_nfc, - types::Usn, }; -use itertools::Itertools; use regex::Regex; use std::borrow::Cow; @@ -47,7 +44,7 @@ impl Collection { search_re: &str, repl: &str, field_name: Option, - ) -> Result { + ) -> Result { self.transact(None, |col| { let norm = col.normalize_note_text(); let search = if norm { @@ -56,60 +53,47 @@ impl Collection { search_re.into() }; let ctx = FindReplaceContext::new(nids, &search, repl, field_name)?; - col.find_and_replace_inner(ctx, col.usn()?, norm) + col.find_and_replace_inner(ctx) }) } - fn find_and_replace_inner( - &mut self, - ctx: FindReplaceContext, - usn: Usn, - normalize_text: bool, - ) -> Result { - let mut total_changed = 0; - let nids_by_notetype = self.storage.note_ids_by_notetype(&ctx.nids)?; - for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) { - let nt = self - .get_notetype(ntid)? - .ok_or_else(|| AnkiError::invalid_input("missing note type"))?; - let genctx = CardGenContext::new(&nt, usn); - let field_ord = ctx.field_name.as_ref().and_then(|n| nt.get_field_ord(n)); - for (_, nid) in group { - let mut note = self.storage.get_note(nid)?.unwrap(); - let mut changed = false; - match field_ord { - None => { - // all fields - for txt in &mut note.fields { - if let Cow::Owned(otxt) = ctx.replace_text(txt) { - changed = true; - *txt = otxt; - } - } - } - Some(ord) => { - // single field - if let Some(txt) = note.fields.get_mut(ord) { - if let Cow::Owned(otxt) = ctx.replace_text(txt) { - changed = true; - *txt = otxt; - } + fn find_and_replace_inner(&mut self, ctx: FindReplaceContext) -> Result { + let mut last_ntid = None; + let mut field_ord = None; + self.transform_notes(&ctx.nids, |note, nt| { + if last_ntid != Some(nt.id) { + field_ord = ctx.field_name.as_ref().and_then(|n| nt.get_field_ord(n)); + last_ntid = Some(nt.id); + } + + let mut changed = false; + match field_ord { + None => { + // all fields + for txt in &mut note.fields { + if let Cow::Owned(otxt) = ctx.replace_text(txt) { + changed = true; + *txt = otxt; } } } - if changed { - self.update_note_inner_generating_cards( - &genctx, - &mut note, - true, - normalize_text, - )?; - total_changed += 1; + Some(ord) => { + // single field + if let Some(txt) = note.fields.get_mut(ord) { + if let Cow::Owned(otxt) = ctx.replace_text(txt) { + changed = true; + *txt = otxt; + } + } } } - } - Ok(total_changed) + Ok(TransformNoteOutput { + changed, + generate_cards: true, + mark_modified: true, + }) + }) } } diff --git a/rslib/src/notes.rs b/rslib/src/notes.rs index 084995f1c..540c20cf9 100644 --- a/rslib/src/notes.rs +++ b/rslib/src/notes.rs @@ -14,12 +14,20 @@ use crate::{ }; use itertools::Itertools; use num_integer::Integer; -use std::{collections::HashSet, convert::TryInto}; +use regex::{Regex, Replacer}; +use std::{borrow::Cow, collections::HashSet, convert::TryInto}; define_newtype!(NoteID, i64); // fixme: ensure nulls and x1f not in field contents +#[derive(Default)] +pub(crate) struct TransformNoteOutput { + pub changed: bool, + pub generate_cards: bool, + pub mark_modified: bool, +} + #[derive(Debug)] pub struct Note { pub id: NoteID, @@ -116,6 +124,17 @@ impl Note { }) .collect() } + + pub(crate) fn replace_tags(&mut self, re: &Regex, mut repl: T) -> bool { + let mut changed = false; + for tag in &mut self.tags { + if let Cow::Owned(rep) = re.replace_all(tag, repl.by_ref()) { + *tag = rep; + changed = true; + } + } + changed + } } impl From for pb::Note { @@ -174,13 +193,10 @@ fn anki_base91(mut n: u64) -> String { impl Collection { fn canonify_note_tags(&self, note: &mut Note, usn: Usn) -> Result<()> { - // fixme: avoid the excess split/join - note.tags = self - .canonify_tags(¬e.tags.join(" "), usn)? - .0 - .split(' ') - .map(Into::into) - .collect(); + if !note.tags.is_empty() { + let tags = std::mem::replace(&mut note.tags, vec![]); + note.tags = self.canonify_tags(tags, usn)?.0; + } Ok(()) } @@ -268,38 +284,69 @@ impl Collection { pub(crate) fn after_note_updates( &mut self, nids: &[NoteID], - usn: Usn, generate_cards: bool, mark_notes_modified: bool, ) -> Result<()> { + self.transform_notes(nids, |_note, _nt| { + Ok(TransformNoteOutput { + changed: true, + generate_cards, + mark_modified: mark_notes_modified, + }) + }) + .map(|_| ()) + } + + pub(crate) fn transform_notes( + &mut self, + nids: &[NoteID], + mut transformer: F, + ) -> Result + where + F: FnMut(&mut Note, &NoteType) -> Result, + { let nids_by_notetype = self.storage.note_ids_by_notetype(nids)?; let norm = self.normalize_note_text(); + let mut changed_notes = 0; + let usn = self.usn()?; + for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) { let nt = self .get_notetype(ntid)? .ok_or_else(|| AnkiError::invalid_input("missing note type"))?; - let genctx = CardGenContext::new(&nt, usn); + + let mut genctx = None; for (_, nid) in group { + // grab the note and transform it let mut note = self.storage.get_note(nid)?.unwrap(); - if generate_cards { + let out = transformer(&mut note, &nt)?; + if !out.changed { + continue; + } + + if out.generate_cards { + let ctx = genctx.get_or_insert_with(|| CardGenContext::new(&nt, usn)); self.update_note_inner_generating_cards( - &genctx, + &ctx, &mut note, - mark_notes_modified, + out.mark_modified, norm, )?; } else { self.update_note_inner_without_cards( &mut note, - &genctx.notetype, + &nt, usn, - mark_notes_modified, + out.mark_modified, norm, )?; } + + changed_notes += 1; } } - Ok(()) + + Ok(changed_notes) } } diff --git a/rslib/src/storage/sqlite.rs b/rslib/src/storage/sqlite.rs index 761ccc589..25e92fdd2 100644 --- a/rslib/src/storage/sqlite.rs +++ b/rslib/src/storage/sqlite.rs @@ -301,4 +301,13 @@ impl SqliteStorage { .execute(&[TimestampMillis::now()])?; Ok(()) } + + ////////////////////////////////////////// + + #[cfg(test)] + pub(crate) fn db_scalar(&self, sql: &str) -> Result { + self.db + .query_row(sql, NO_PARAMS, |r| r.get(0)) + .map_err(Into::into) + } } diff --git a/rslib/src/tags.rs b/rslib/src/tags.rs index 13dd6bddc..93b22e41a 100644 --- a/rslib/src/tags.rs +++ b/rslib/src/tags.rs @@ -1,9 +1,13 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -use crate::collection::Collection; -use crate::err::Result; -use crate::types::Usn; +use crate::{ + collection::Collection, + err::{AnkiError, Result}, + notes::{NoteID, TransformNoteOutput}, + {text::normalize_to_nfc, types::Usn}, +}; +use regex::{NoExpand, Regex, Replacer}; use std::{borrow::Cow, collections::HashSet}; use unicase::UniCase; @@ -21,30 +25,43 @@ pub(crate) fn join_tags(tags: &[String]) -> String { } impl Collection { - /// Given a space-separated list of tags, fix case, ordering and duplicates. + /// Given a list of tags, fix case, ordering and duplicates. /// Returns true if any new tags were added. - pub(crate) fn canonify_tags(&self, tags: &str, usn: Usn) -> Result<(String, bool)> { - let mut tagset = HashSet::new(); + pub(crate) fn canonify_tags(&self, tags: Vec, usn: Usn) -> Result<(Vec, bool)> { + let mut seen = HashSet::new(); let mut added = false; - for tag in split_tags(tags) { + let tags: Vec<_> = tags + .iter() + .flat_map(|t| split_tags(t)) + .map(|s| normalize_to_nfc(&s)) + .collect(); + + for tag in &tags { + if tag.trim().is_empty() { + continue; + } let tag = self.register_tag(tag, usn)?; if matches!(tag, Cow::Borrowed(_)) { added = true; } - tagset.insert(UniCase::new(tag)); + seen.insert(UniCase::new(tag)); } - if tagset.is_empty() { - return Ok(("".into(), added)); + // exit early if no non-empty tags + if seen.is_empty() { + return Ok((vec![], added)); } - let mut tags = tagset.into_iter().collect::>(); + // return the sorted, canonified tags + let mut tags = seen.into_iter().collect::>(); tags.sort_unstable(); + let tags: Vec<_> = tags + .into_iter() + .map(|s| s.into_inner().to_string()) + .collect(); - let tags: Vec<_> = tags.into_iter().map(|s| s.into_inner()).collect(); - - Ok((format!(" {} ", tags.join(" ")), added)) + Ok((tags, added)) } pub(crate) fn register_tag<'a>(&self, tag: &'a str, usn: Usn) -> Result> { @@ -69,4 +86,172 @@ impl Collection { } Ok(changed) } + + fn replace_tags_for_notes_inner( + &mut self, + nids: &[NoteID], + tags: &[Regex], + mut repl: R, + ) -> Result { + self.transact(None, |col| { + col.transform_notes(nids, |note, _nt| { + let mut changed = false; + for re in tags { + if note.replace_tags(re, repl.by_ref()) { + changed = true; + } + } + + Ok(TransformNoteOutput { + changed, + generate_cards: false, + mark_modified: true, + }) + }) + }) + } + + /// Apply the provided list of regular expressions to note tags, + /// saving any modified notes. + pub fn replace_tags_for_notes( + &mut self, + nids: &[NoteID], + tags: &str, + repl: &str, + regex: bool, + ) -> Result { + // generate regexps + let tags = split_tags(tags) + .map(|tag| { + let tag = if regex { + tag.into() + } else { + regex::escape(tag) + }; + Regex::new(&format!("(?i){}", tag)) + .map_err(|_| AnkiError::invalid_input("invalid regex")) + }) + .collect::>>()?; + + if !regex { + self.replace_tags_for_notes_inner(nids, &tags, NoExpand(repl)) + } else { + self.replace_tags_for_notes_inner(nids, &tags, repl) + } + } + + pub fn add_tags_for_notes(&mut self, nids: &[NoteID], tags: &str) -> Result { + let tags: Vec<_> = split_tags(tags).collect(); + let matcher = regex::RegexSet::new( + tags.iter() + .map(|s| regex::escape(s)) + .map(|s| format!("(?i){}", s)), + ) + .map_err(|_| AnkiError::invalid_input("invalid regex"))?; + + self.transact(None, |col| { + col.transform_notes(nids, |note, _nt| { + let mut need_to_add = true; + let mut match_count = 0; + for tag in ¬e.tags { + if matcher.is_match(tag) { + match_count += 1; + } + if match_count == tags.len() { + need_to_add = false; + break; + } + } + + if need_to_add { + note.tags.extend(tags.iter().map(|&s| s.to_string())) + } + + Ok(TransformNoteOutput { + changed: need_to_add, + generate_cards: false, + mark_modified: true, + }) + }) + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{collection::open_test_collection, decks::DeckID}; + + #[test] + fn tags() -> Result<()> { + let mut col = open_test_collection(); + let nt = col.get_notetype_by_name("Basic")?.unwrap(); + let mut note = nt.new_note(); + col.add_note(&mut note, DeckID(1))?; + + let tags: String = col.storage.db_scalar("select tags from notes")?; + assert_eq!(tags, ""); + + // first instance wins in case of duplicates + note.tags = vec!["foo".into(), "FOO".into()]; + col.update_note(&mut note)?; + assert_eq!(¬e.tags, &["foo"]); + let tags: String = col.storage.db_scalar("select tags from notes")?; + assert_eq!(tags, " foo "); + + // existing case is used if in DB + note.tags = vec!["FOO".into()]; + col.update_note(&mut note)?; + assert_eq!(¬e.tags, &["foo"]); + assert_eq!(tags, " foo "); + + // tags are normalized to nfc + note.tags = vec!["\u{fa47}".into()]; + col.update_note(&mut note)?; + assert_eq!(¬e.tags, &["\u{6f22}"]); + + // if code incorrectly adds a space to a tag, it gets split + note.tags = vec!["one two".into()]; + col.update_note(&mut note)?; + assert_eq!(¬e.tags, &["one", "two"]); + + Ok(()) + } + + #[test] + fn bulk() -> Result<()> { + let mut col = open_test_collection(); + let nt = col.get_notetype_by_name("Basic")?.unwrap(); + let mut note = nt.new_note(); + note.tags.push("test".into()); + col.add_note(&mut note, DeckID(1))?; + + col.replace_tags_for_notes(&[note.id], "foo test", "bar", false)?; + let note = col.storage.get_note(note.id)?.unwrap(); + assert_eq!(note.tags[0], "bar"); + + col.replace_tags_for_notes(&[note.id], "b.r", "baz", false)?; + let note = col.storage.get_note(note.id)?.unwrap(); + assert_eq!(note.tags[0], "bar"); + + col.replace_tags_for_notes(&[note.id], "b.r", "baz", true)?; + let note = col.storage.get_note(note.id)?.unwrap(); + assert_eq!(note.tags[0], "baz"); + + let cnt = col.add_tags_for_notes(&[note.id], "cee aye")?; + assert_eq!(cnt, 1); + let note = col.storage.get_note(note.id)?.unwrap(); + assert_eq!(¬e.tags, &["aye", "baz", "cee"]); + + // if all tags already on note, it doesn't get updated + let cnt = col.add_tags_for_notes(&[note.id], "cee aye")?; + assert_eq!(cnt, 0); + + // empty replacement deletes tag + col.replace_tags_for_notes(&[note.id], "b.* .*ye", "", true)?; + let note = col.storage.get_note(note.id)?.unwrap(); + assert_eq!(¬e.tags, &["cee"]); + + Ok(()) + } }