diff --git a/proto/backend.proto b/proto/backend.proto index 7a69e697e..353700275 100644 --- a/proto/backend.proto +++ b/proto/backend.proto @@ -136,6 +136,7 @@ service BackendService { rpc ClozeNumbersInNote (Note) returns (ClozeNumbersInNoteOut); rpc AfterNoteUpdates (AfterNoteUpdatesIn) returns (Empty); rpc FieldNamesForNotes (FieldNamesForNotesIn) returns (FieldNamesForNotesOut); + rpc NoteIsDuplicateOrEmpty (Note) returns (NoteIsDuplicateOrEmptyOut); // note types @@ -865,3 +866,12 @@ message GetDeckNamesIn { // if unset, implies skip_empty_default bool include_filtered = 2; } + +message NoteIsDuplicateOrEmptyOut { + enum State { + NORMAL = 0; + EMPTY = 1; + DUPLICATE = 2; + } + State state = 1; +} diff --git a/pylib/anki/notes.py b/pylib/anki/notes.py index 4a91b762e..8d550f198 100644 --- a/pylib/anki/notes.py +++ b/pylib/anki/notes.py @@ -9,7 +9,7 @@ import anki # pylint: disable=unused-import from anki import hooks from anki.models import NoteType from anki.rsbackend import BackendNote -from anki.utils import fieldChecksum, joinFields, splitFields, stripHTMLMedia +from anki.utils import joinFields class Note: @@ -140,18 +140,5 @@ class Note: ################################################## def dupeOrEmpty(self) -> int: - "1 if first is empty; 2 if first is a duplicate, False otherwise." - val = self.fields[0] - if not val.strip(): - return 1 - csum = fieldChecksum(val) - # find any matching csums and compare - for flds in self.col.db.list( - "select flds from notes where csum = ? and id != ? and mid = ?", - csum, - self.id or 0, - self.mid, - ): - if stripHTMLMedia(splitFields(flds)[0]) == stripHTMLMedia(self.fields[0]): - return 2 - return False + "1 if first is empty; 2 if first is a duplicate, 0 otherwise." + return self.col.backend.note_is_duplicate_or_empty(self.to_backend_note()) diff --git a/pylib/anki/rsbackend.py b/pylib/anki/rsbackend.py index fe7cd2fa8..17b94a2fc 100644 --- a/pylib/anki/rsbackend.py +++ b/pylib/anki/rsbackend.py @@ -543,6 +543,13 @@ class RustBackend: output.ParseFromString(self._run_command(45, input)) return output.fields + def note_is_duplicate_or_empty( + self, input: pb.Note + ) -> pb.NoteIsDuplicateOrEmptyOut.State: + output = pb.NoteIsDuplicateOrEmptyOut() + output.ParseFromString(self._run_command(46, input)) + return output.state + def add_or_update_notetype( self, *, json: bytes, preserve_usn_and_mtime: bool ) -> int: @@ -550,43 +557,43 @@ class RustBackend: json=json, preserve_usn_and_mtime=preserve_usn_and_mtime ) output = pb.NoteTypeID() - output.ParseFromString(self._run_command(46, input)) + output.ParseFromString(self._run_command(47, input)) return output.ntid def get_stock_notetype_legacy(self, kind: pb.StockNoteType) -> bytes: input = pb.GetStockNotetypeIn(kind=kind) output = pb.Json() - output.ParseFromString(self._run_command(47, input)) + output.ParseFromString(self._run_command(48, input)) return output.json def get_notetype_legacy(self, ntid: int) -> bytes: input = pb.NoteTypeID(ntid=ntid) output = pb.Json() - output.ParseFromString(self._run_command(48, input)) + output.ParseFromString(self._run_command(49, input)) return output.json def get_notetype_names(self) -> Sequence[pb.NoteTypeNameID]: input = pb.Empty() output = pb.NoteTypeNames() - output.ParseFromString(self._run_command(49, input)) + output.ParseFromString(self._run_command(50, input)) return output.entries def get_notetype_names_and_counts(self) -> Sequence[pb.NoteTypeNameIDUseCount]: input = pb.Empty() output = pb.NoteTypeUseCounts() - output.ParseFromString(self._run_command(50, input)) + output.ParseFromString(self._run_command(51, input)) return output.entries def get_notetype_id_by_name(self, val: str) -> int: input = pb.String(val=val) output = pb.NoteTypeID() - output.ParseFromString(self._run_command(51, input)) + output.ParseFromString(self._run_command(52, input)) return output.ntid def remove_notetype(self, ntid: int) -> pb.Empty: input = pb.NoteTypeID(ntid=ntid) output = pb.Empty() - output.ParseFromString(self._run_command(52, input)) + output.ParseFromString(self._run_command(53, input)) return output def open_collection( @@ -604,42 +611,42 @@ class RustBackend: log_path=log_path, ) output = pb.Empty() - output.ParseFromString(self._run_command(53, input)) + output.ParseFromString(self._run_command(54, input)) return output def close_collection(self, downgrade_to_schema11: bool) -> pb.Empty: input = pb.CloseCollectionIn(downgrade_to_schema11=downgrade_to_schema11) output = pb.Empty() - output.ParseFromString(self._run_command(54, input)) + output.ParseFromString(self._run_command(55, input)) return output def check_database(self) -> Sequence[str]: input = pb.Empty() output = pb.CheckDatabaseOut() - output.ParseFromString(self._run_command(55, input)) + output.ParseFromString(self._run_command(56, input)) return output.problems def sync_media(self, *, hkey: str, endpoint: str) -> pb.Empty: input = pb.SyncMediaIn(hkey=hkey, endpoint=endpoint) output = pb.Empty() - output.ParseFromString(self._run_command(56, input)) + output.ParseFromString(self._run_command(57, input)) return output def abort_media_sync(self) -> pb.Empty: input = pb.Empty() output = pb.Empty() - output.ParseFromString(self._run_command(57, input)) + output.ParseFromString(self._run_command(58, input)) return output def before_upload(self) -> pb.Empty: input = pb.Empty() output = pb.Empty() - output.ParseFromString(self._run_command(58, input)) + output.ParseFromString(self._run_command(59, input)) return output def translate_string(self, input: pb.TranslateStringIn) -> str: output = pb.String() - output.ParseFromString(self._run_command(59, input)) + output.ParseFromString(self._run_command(60, input)) return output.val def format_timespan( @@ -647,7 +654,7 @@ class RustBackend: ) -> str: input = pb.FormatTimespanIn(seconds=seconds, context=context) output = pb.String() - output.ParseFromString(self._run_command(60, input)) + output.ParseFromString(self._run_command(61, input)) return output.val def register_tags( @@ -657,61 +664,61 @@ class RustBackend: tags=tags, preserve_usn=preserve_usn, usn=usn, clear_first=clear_first ) output = pb.Bool() - output.ParseFromString(self._run_command(61, input)) + output.ParseFromString(self._run_command(62, input)) return output.val def all_tags(self) -> Sequence[pb.TagUsnTuple]: input = pb.Empty() output = pb.AllTagsOut() - output.ParseFromString(self._run_command(62, input)) + output.ParseFromString(self._run_command(63, input)) return output.tags def get_changed_tags(self, val: int) -> Sequence[str]: input = pb.Int32(val=val) output = pb.GetChangedTagsOut() - output.ParseFromString(self._run_command(63, input)) + output.ParseFromString(self._run_command(64, input)) return output.tags def get_config_json(self, val: str) -> bytes: input = pb.String(val=val) output = pb.Json() - output.ParseFromString(self._run_command(64, input)) + output.ParseFromString(self._run_command(65, input)) return output.json def set_config_json(self, *, key: str, value_json: bytes) -> pb.Empty: input = pb.SetConfigJsonIn(key=key, value_json=value_json) output = pb.Empty() - output.ParseFromString(self._run_command(65, input)) + output.ParseFromString(self._run_command(66, input)) return output def remove_config(self, val: str) -> pb.Empty: input = pb.String(val=val) output = pb.Empty() - output.ParseFromString(self._run_command(66, input)) + output.ParseFromString(self._run_command(67, input)) return output def set_all_config(self, json: bytes) -> pb.Empty: input = pb.Json(json=json) output = pb.Empty() - output.ParseFromString(self._run_command(67, input)) + output.ParseFromString(self._run_command(68, input)) return output def get_all_config(self) -> bytes: input = pb.Empty() output = pb.Json() - output.ParseFromString(self._run_command(68, input)) + output.ParseFromString(self._run_command(69, input)) return output.json def get_preferences(self) -> pb.CollectionSchedulingSettings: input = pb.Empty() output = pb.Preferences() - output.ParseFromString(self._run_command(69, input)) + output.ParseFromString(self._run_command(70, input)) return output.sched def set_preferences(self, sched: pb.CollectionSchedulingSettings) -> pb.Empty: input = pb.Preferences(sched=sched) output = pb.Empty() - output.ParseFromString(self._run_command(70, input)) + output.ParseFromString(self._run_command(71, input)) return output # @@AUTOGEN@@ diff --git a/rslib/src/backend/mod.rs b/rslib/src/backend/mod.rs index 3d5276f07..5d233ae55 100644 --- a/rslib/src/backend/mod.rs +++ b/rslib/src/backend/mod.rs @@ -682,6 +682,17 @@ impl BackendService for Backend { }) } + fn note_is_duplicate_or_empty( + &mut self, + input: pb::Note, + ) -> BackendResult { + let note: Note = input.into(); + self.with_col(|col| { + col.note_is_duplicate_or_empty(¬e) + .map(|r| pb::NoteIsDuplicateOrEmptyOut { state: r as i32 }) + }) + } + // notetypes //------------------------------------------------------------------- diff --git a/rslib/src/notes.rs b/rslib/src/notes.rs index 9d04a8aef..f5ee41cf7 100644 --- a/rslib/src/notes.rs +++ b/rslib/src/notes.rs @@ -1,6 +1,7 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +use crate::backend_proto::note_is_duplicate_or_empty_out::State as DuplicateState; use crate::{ backend_proto as pb, collection::Collection, @@ -382,6 +383,28 @@ impl Collection { Ok(changed_notes) } + + pub(crate) fn note_is_duplicate_or_empty(&self, note: &Note) -> Result { + if let Some(field1) = note.fields.get(0) { + let stripped = strip_html_preserving_image_filenames(field1); + if stripped.trim().is_empty() { + Ok(DuplicateState::Empty) + } else { + let csum = field_checksum(&stripped); + for field in self + .storage + .note_fields_by_checksum(note.id, note.ntid, csum)? + { + if strip_html_preserving_image_filenames(&field) == stripped { + return Ok(DuplicateState::Duplicate); + } + } + Ok(DuplicateState::Normal) + } + } else { + Ok(DuplicateState::Empty) + } + } } #[cfg(test)] diff --git a/rslib/src/storage/note/mod.rs b/rslib/src/storage/note/mod.rs index c9fc3d79c..73e4cbf65 100644 --- a/rslib/src/storage/note/mod.rs +++ b/rslib/src/storage/note/mod.rs @@ -4,6 +4,7 @@ use crate::{ err::Result, notes::{Note, NoteID}, + notetype::NoteTypeID, tags::{join_tags, split_tags}, timestamp::TimestampMillis, }; @@ -95,4 +96,18 @@ impl super::SqliteStorage { .execute(NO_PARAMS)?; Ok(()) } + + /// Returns the first field of other notes with the same checksum. + /// The field of the provided note ID is not returned. + pub(crate) fn note_fields_by_checksum( + &self, + nid: NoteID, + ntid: NoteTypeID, + csum: u32, + ) -> Result> { + self.db + .prepare("select field_at_index(flds, 0) from notes where csum=? and mid=? and id !=?")? + .query_and_then(params![csum, ntid, nid], |r| r.get(0).map_err(Into::into))? + .collect() + } } diff --git a/rspy/src/lib.rs b/rspy/src/lib.rs index b7fd82ad2..05765edda 100644 --- a/rspy/src/lib.rs +++ b/rspy/src/lib.rs @@ -105,6 +105,7 @@ fn want_release_gil(method: u32) -> bool { BackendMethod::GetAllConfig => true, BackendMethod::GetPreferences => true, BackendMethod::SetPreferences => true, + BackendMethod::NoteIsDuplicateOrEmpty => true, } } else { false