From 8b557ec382c385507056f73ba6628a341e3a4136 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Tue, 5 May 2020 20:50:17 +1000 Subject: [PATCH] move find&replace to backend --- proto/backend.proto | 20 +++ pylib/anki/find.py | 72 ++------- pylib/anki/rsbackend.py | 27 ++++ rslib/src/backend/mod.rs | 38 +++++ rslib/src/findreplace.rs | 146 ++++++++++++++++++ rslib/src/lib.rs | 1 + rslib/src/notetype/mod.rs | 16 ++ .../notetype/field_names_for_notes.sql | 10 ++ rslib/src/storage/notetype/mod.rs | 26 ++++ 9 files changed, 293 insertions(+), 63 deletions(-) create mode 100644 rslib/src/findreplace.rs create mode 100644 rslib/src/storage/notetype/field_names_for_notes.sql diff --git a/proto/backend.proto b/proto/backend.proto index 8e7bc22e6..f48a7453e 100644 --- a/proto/backend.proto +++ b/proto/backend.proto @@ -90,6 +90,8 @@ message BackendInput { bool new_deck_legacy = 75; int64 remove_deck = 76; Empty deck_tree_legacy = 77; + FieldNamesForNotesIn field_names_for_notes = 78; + FindAndReplaceIn find_and_replace = 79; } } @@ -159,6 +161,8 @@ message BackendOutput { bytes new_deck_legacy = 75; Empty remove_deck = 76; bytes deck_tree_legacy = 77; + FieldNamesForNotesOut field_names_for_notes = 78; + uint32 find_and_replace = 79; BackendError error = 2047; } @@ -712,3 +716,19 @@ message AddOrUpdateDeckLegacyIn { bool preserve_usn_and_mtime = 2; } +message FieldNamesForNotesIn { + repeated int64 nids = 1; +} + +message FieldNamesForNotesOut { + repeated string fields = 1; +} + +message FindAndReplaceIn { + repeated int64 nids = 1; + string search = 2; + string replacement = 3; + bool regex = 4; + bool match_case = 5; + string field_name = 6; +} diff --git a/pylib/anki/find.py b/pylib/anki/find.py index 8ecc8211e..40ee63ed6 100644 --- a/pylib/anki/find.py +++ b/pylib/anki/find.py @@ -3,11 +3,10 @@ from __future__ import annotations -import re from typing import TYPE_CHECKING, Optional, Set from anki.hooks import * -from anki.utils import ids2str, intTime, joinFields, splitFields, stripHTMLMedia +from anki.utils import ids2str, splitFields, stripHTMLMedia if TYPE_CHECKING: from anki.collection import _Collection @@ -38,56 +37,16 @@ def findReplace( field: Optional[str] = None, fold: bool = True, ) -> int: - "Find and replace fields in a note." - mmap: Dict[str, Any] = {} - if field: - for m in col.models.all(): - for f in m["flds"]: - if f["name"].lower() == field.lower(): - mmap[str(m["id"])] = f["ord"] - if not mmap: - return 0 - # find and gather replacements - if not regex: - src = re.escape(src) - dst = dst.replace("\\", "\\\\") - if fold: - src = "(?i)" + src - compiled_re = re.compile(src) + "Find and replace fields in a note. Returns changed note count." + return col.backend.find_and_replace(nids, src, dst, regex, fold, field) - def repl(s: str): - return compiled_re.sub(dst, s) - d = [] - snids = ids2str(nids) - nids = [] - for nid, mid, flds in col.db.execute( - "select id, mid, flds from notes where id in " + snids - ): - origFlds = flds - # does it match? - sflds = splitFields(flds) - if field: - try: - ord = mmap[str(mid)] - sflds[ord] = repl(sflds[ord]) - except KeyError: - # note doesn't have that field - continue - else: - for c in range(len(sflds)): - sflds[c] = repl(sflds[c]) - flds = joinFields(sflds) - if flds != origFlds: - nids.append(nid) - d.append((flds, intTime(), col.usn(), nid)) - if not d: - return 0 - # replace - col.db.executemany("update notes set flds=?,mod=?,usn=? where id=?", d) - col.updateFieldCache(nids) - col.genCards(nids) - return len(d) +def fieldNamesForNotes(col: _Collection, nids: List[int]) -> List[str]: + return list(col.backend.field_names_for_note_ids(nids)) + + +# Find duplicates +########################################################################## def fieldNames(col, downcase=True) -> List: @@ -100,19 +59,6 @@ def fieldNames(col, downcase=True) -> List: return list(fields) -def fieldNamesForNotes(col, nids) -> List: - fields: Set[str] = set() - mids = col.db.list("select distinct mid from notes where id in %s" % ids2str(nids)) - for mid in mids: - model = col.models.get(mid) - for name in col.models.fieldNames(model): - if name not in fields: # slower w/o - fields.add(name) - return sorted(fields, key=lambda x: x.lower()) - - -# Find duplicates -########################################################################## # returns array of ("dupestr", [nids]) def findDupes( col: _Collection, fieldName: str, search: str = "" diff --git a/pylib/anki/rsbackend.py b/pylib/anki/rsbackend.py index 126fc4d01..df8999455 100644 --- a/pylib/anki/rsbackend.py +++ b/pylib/anki/rsbackend.py @@ -745,6 +745,33 @@ class RustBackend: ).deck_tree_legacy return orjson.loads(bytes)[5] + def field_names_for_note_ids(self, nids: List[int]) -> Sequence[str]: + return self._run_command( + pb.BackendInput(field_names_for_notes=pb.FieldNamesForNotesIn(nids=nids)) + ).field_names_for_notes.fields + + def find_and_replace( + self, + nids: List[int], + search: str, + repl: str, + re: bool, + nocase: bool, + field_name: Optional[str], + ) -> int: + return self._run_command( + pb.BackendInput( + find_and_replace=pb.FindAndReplaceIn( + nids=nids, + search=search, + replacement=repl, + regex=re, + match_case=not nocase, + field_name=field_name, + ) + ) + ).find_and_replace + def translate_string_in( key: TR, **kwargs: Union[str, int, float] diff --git a/rslib/src/backend/mod.rs b/rslib/src/backend/mod.rs index fc954ddcc..3daed136d 100644 --- a/rslib/src/backend/mod.rs +++ b/rslib/src/backend/mod.rs @@ -13,6 +13,7 @@ use crate::{ deckconf::{DeckConf, DeckConfID}, decks::{Deck, DeckID, DeckSchema11}, err::{AnkiError, NetworkErrorKind, Result, SyncErrorKind}, + findreplace::FindReplaceContext, i18n::{tr_args, I18n, TR}, latex::{extract_latex, extract_latex_expanding_clozes, ExtractedLatex}, log, @@ -361,6 +362,10 @@ impl Backend { OValue::CheckDatabase(pb::Empty {}) } Value::DeckTreeLegacy(_) => OValue::DeckTreeLegacy(self.deck_tree_legacy()?), + Value::FieldNamesForNotes(input) => { + OValue::FieldNamesForNotes(self.field_names_for_notes(input)?) + } + Value::FindAndReplace(input) => OValue::FindAndReplace(self.find_and_replace(input)?), }) } @@ -1056,6 +1061,39 @@ impl Backend { serde_json::to_vec(&tree).map_err(Into::into) }) } + + fn field_names_for_notes( + &self, + input: pb::FieldNamesForNotesIn, + ) -> Result { + self.with_col(|col| { + let nids: Vec<_> = input.nids.into_iter().map(NoteID).collect(); + col.storage + .field_names_for_notes(&nids) + .map(|fields| pb::FieldNamesForNotesOut { fields }) + }) + } + + fn find_and_replace(&self, input: pb::FindAndReplaceIn) -> Result { + let mut search = if input.regex { + input.search + } else { + regex::escape(&input.search) + }; + if !input.match_case { + search = format!("(?i){}", search); + } + let nids = input.nids.into_iter().map(NoteID).collect(); + let field_name = if input.field_name.is_empty() { + None + } else { + Some(input.field_name) + }; + let repl = input.replacement; + self.with_col(|col| { + col.find_and_replace(FindReplaceContext::new(nids, &search, &repl, field_name)?) + }) + } } fn translate_arg_to_fluent_val(arg: &pb::TranslateArgValue) -> FluentValue { diff --git a/rslib/src/findreplace.rs b/rslib/src/findreplace.rs new file mode 100644 index 000000000..cfa9cf50f --- /dev/null +++ b/rslib/src/findreplace.rs @@ -0,0 +1,146 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use crate::{ + collection::Collection, + err::{AnkiError, Result}, + notes::NoteID, + notetype::CardGenContext, + types::Usn, +}; +use itertools::Itertools; +use regex::Regex; +use std::borrow::Cow; + +pub struct FindReplaceContext { + nids: Vec, + search: Regex, + replacement: String, + field_name: Option, +} + +impl FindReplaceContext { + pub fn new( + nids: Vec, + search_re: &str, + repl: impl Into, + field_name: Option, + ) -> Result { + Ok(FindReplaceContext { + nids, + search: Regex::new(search_re).map_err(|_| AnkiError::invalid_input("invalid regex"))?, + replacement: repl.into(), + field_name, + }) + } + + fn replace_text<'a>(&self, text: &'a str) -> Cow<'a, str> { + self.search.replace_all(text, self.replacement.as_str()) + } +} + +impl Collection { + pub fn find_and_replace(&mut self, ctx: FindReplaceContext) -> Result { + self.transact(None, |col| col.find_and_replace_inner(ctx, col.usn()?)) + } + + fn find_and_replace_inner(&mut self, ctx: FindReplaceContext, usn: Usn) -> Result { + let mut total_changed = 0; + let nids_by_notetype = self.storage.note_ids_by_notetype(&ctx.nids)?; + for (ntid, group) in &nids_by_notetype.into_iter().group_by(|tup| tup.0) { + let nt = self + .get_notetype(ntid)? + .ok_or_else(|| AnkiError::invalid_input("missing note type"))?; + let genctx = CardGenContext::new(&nt, usn); + let field_ord = ctx.field_name.as_ref().and_then(|n| nt.get_field_ord(n)); + for (_, nid) in group { + let mut note = self.storage.get_note(nid)?.unwrap(); + let mut changed = false; + match field_ord { + None => { + // all fields + for txt in &mut note.fields { + if let Cow::Owned(otxt) = ctx.replace_text(txt) { + changed = true; + *txt = otxt; + } + } + } + Some(ord) => { + // single field + if let Some(txt) = note.fields.get_mut(ord) { + if let Cow::Owned(otxt) = ctx.replace_text(txt) { + changed = true; + *txt = otxt; + } + } + } + } + if changed { + self.update_note_inner(&genctx, &mut note)?; + total_changed += 1; + } + } + } + + Ok(total_changed) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{collection::open_test_collection, decks::DeckID}; + + #[test] + fn findreplace() -> Result<()> { + let mut col = open_test_collection(); + + let nt = col.get_notetype_by_name("Basic")?.unwrap(); + let mut note = nt.new_note(); + note.fields[0] = "one aaa".into(); + note.fields[1] = "two aaa".into(); + col.add_note(&mut note, DeckID(1))?; + + let nt = col.get_notetype_by_name("Cloze")?.unwrap(); + let mut note2 = nt.new_note(); + note2.fields[0] = "three aaa".into(); + col.add_note(&mut note2, DeckID(1))?; + + let nids = col.search_notes_only("")?; + let cnt = col.find_and_replace(FindReplaceContext::new( + nids.clone(), + "(?i)AAA", + "BBB", + None, + )?)?; + assert_eq!(cnt, 2); + + let note = col.storage.get_note(note.id)?.unwrap(); + // but the update should be limited to the specified field when it was available + assert_eq!(¬e.fields, &["one BBB", "two BBB"]); + + let note2 = col.storage.get_note(note2.id)?.unwrap(); + assert_eq!(¬e2.fields, &["three BBB"]); + + assert_eq!( + col.storage.field_names_for_notes(&nids)?, + vec!["Back".to_string(), "Front".into(), "Text".into()] + ); + let cnt = col.find_and_replace(FindReplaceContext::new( + nids.clone(), + "BBB", + "ccc", + Some("Front".into()), + )?)?; + // still 2, as the caller is expected to provide only note ids that have + // that field, and if we can't find the field we fall back on all fields + assert_eq!(cnt, 2); + + let note = col.storage.get_note(note.id)?.unwrap(); + // but the update should be limited to the specified field when it was available + assert_eq!(¬e.fields, &["one ccc", "two BBB"]); + + Ok(()) + } +} diff --git a/rslib/src/lib.rs b/rslib/src/lib.rs index 0d4062afc..caa7ee274 100644 --- a/rslib/src/lib.rs +++ b/rslib/src/lib.rs @@ -18,6 +18,7 @@ pub mod dbcheck; pub mod deckconf; pub mod decks; pub mod err; +pub mod findreplace; pub mod i18n; pub mod latex; pub mod log; diff --git a/rslib/src/notetype/mod.rs b/rslib/src/notetype/mod.rs index 83a4bb905..afe1f2654 100644 --- a/rslib/src/notetype/mod.rs +++ b/rslib/src/notetype/mod.rs @@ -294,6 +294,22 @@ impl NoteType { fn fix_field_names(&mut self) { self.fields.iter_mut().for_each(NoteField::fix_name); } + + /// Find the field index of the provided field name. + pub(crate) fn get_field_ord(&self, field_name: &str) -> Option { + let field_name = UniCase::new(field_name); + self.fields + .iter() + .enumerate() + .filter_map(|(idx, f)| { + if UniCase::new(&f.name) == field_name { + Some(idx) + } else { + None + } + }) + .next() + } } impl From for NoteTypeProto { diff --git a/rslib/src/storage/notetype/field_names_for_notes.sql b/rslib/src/storage/notetype/field_names_for_notes.sql new file mode 100644 index 000000000..a805044e3 --- /dev/null +++ b/rslib/src/storage/notetype/field_names_for_notes.sql @@ -0,0 +1,10 @@ +select + distinct name +from fields +where + ntid in ( + select + mid + from notes + where + id in \ No newline at end of file diff --git a/rslib/src/storage/notetype/mod.rs b/rslib/src/storage/notetype/mod.rs index 0d4effc3e..4211fe67f 100644 --- a/rslib/src/storage/notetype/mod.rs +++ b/rslib/src/storage/notetype/mod.rs @@ -131,6 +131,32 @@ impl SqliteStorage { Ok(()) } + /// A sorted list of all field names used by provided notes, for use with + /// the find&replace feature. + pub(crate) fn field_names_for_notes(&self, nids: &[NoteID]) -> Result> { + let mut sql = include_str!("field_names_for_notes.sql").to_string(); + sql.push(' '); + ids_to_string(&mut sql, nids); + sql += ") order by name"; + self.db + .prepare(&sql)? + .query_and_then(NO_PARAMS, |r| r.get(0).map_err(Into::into))? + .collect() + } + + pub(crate) fn note_ids_by_notetype( + &self, + nids: &[NoteID], + ) -> Result> { + let mut sql = String::from("select mid, id from notes where id in "); + ids_to_string(&mut sql, nids); + sql += " order by mid"; + self.db + .prepare(&sql)? + .query_and_then(NO_PARAMS, |r| Ok((r.get(0)?, r.get(1)?)))? + .collect() + } + pub(crate) fn update_notetype_templates( &self, ntid: NoteTypeID,