move dupe check to backend

This commit is contained in:
Damien Elmes 2020-05-24 19:48:56 +10:00
parent d2dd619f18
commit c8d13209cd
7 changed files with 95 additions and 41 deletions

View file

@ -136,6 +136,7 @@ service BackendService {
rpc ClozeNumbersInNote (Note) returns (ClozeNumbersInNoteOut);
rpc AfterNoteUpdates (AfterNoteUpdatesIn) returns (Empty);
rpc FieldNamesForNotes (FieldNamesForNotesIn) returns (FieldNamesForNotesOut);
rpc NoteIsDuplicateOrEmpty (Note) returns (NoteIsDuplicateOrEmptyOut);
// note types
@ -865,3 +866,12 @@ message GetDeckNamesIn {
// if unset, implies skip_empty_default
bool include_filtered = 2;
}
message NoteIsDuplicateOrEmptyOut {
enum State {
NORMAL = 0;
EMPTY = 1;
DUPLICATE = 2;
}
State state = 1;
}

View file

@ -9,7 +9,7 @@ import anki # pylint: disable=unused-import
from anki import hooks
from anki.models import NoteType
from anki.rsbackend import BackendNote
from anki.utils import fieldChecksum, joinFields, splitFields, stripHTMLMedia
from anki.utils import joinFields
class Note:
@ -140,18 +140,5 @@ class Note:
##################################################
def dupeOrEmpty(self) -> int:
"1 if first is empty; 2 if first is a duplicate, False otherwise."
val = self.fields[0]
if not val.strip():
return 1
csum = fieldChecksum(val)
# find any matching csums and compare
for flds in self.col.db.list(
"select flds from notes where csum = ? and id != ? and mid = ?",
csum,
self.id or 0,
self.mid,
):
if stripHTMLMedia(splitFields(flds)[0]) == stripHTMLMedia(self.fields[0]):
return 2
return False
"1 if first is empty; 2 if first is a duplicate, 0 otherwise."
return self.col.backend.note_is_duplicate_or_empty(self.to_backend_note())

View file

@ -543,6 +543,13 @@ class RustBackend:
output.ParseFromString(self._run_command(45, input))
return output.fields
def note_is_duplicate_or_empty(
self, input: pb.Note
) -> pb.NoteIsDuplicateOrEmptyOut.State:
output = pb.NoteIsDuplicateOrEmptyOut()
output.ParseFromString(self._run_command(46, input))
return output.state
def add_or_update_notetype(
self, *, json: bytes, preserve_usn_and_mtime: bool
) -> int:
@ -550,43 +557,43 @@ class RustBackend:
json=json, preserve_usn_and_mtime=preserve_usn_and_mtime
)
output = pb.NoteTypeID()
output.ParseFromString(self._run_command(46, input))
output.ParseFromString(self._run_command(47, input))
return output.ntid
def get_stock_notetype_legacy(self, kind: pb.StockNoteType) -> bytes:
input = pb.GetStockNotetypeIn(kind=kind)
output = pb.Json()
output.ParseFromString(self._run_command(47, input))
output.ParseFromString(self._run_command(48, input))
return output.json
def get_notetype_legacy(self, ntid: int) -> bytes:
input = pb.NoteTypeID(ntid=ntid)
output = pb.Json()
output.ParseFromString(self._run_command(48, input))
output.ParseFromString(self._run_command(49, input))
return output.json
def get_notetype_names(self) -> Sequence[pb.NoteTypeNameID]:
input = pb.Empty()
output = pb.NoteTypeNames()
output.ParseFromString(self._run_command(49, input))
output.ParseFromString(self._run_command(50, input))
return output.entries
def get_notetype_names_and_counts(self) -> Sequence[pb.NoteTypeNameIDUseCount]:
input = pb.Empty()
output = pb.NoteTypeUseCounts()
output.ParseFromString(self._run_command(50, input))
output.ParseFromString(self._run_command(51, input))
return output.entries
def get_notetype_id_by_name(self, val: str) -> int:
input = pb.String(val=val)
output = pb.NoteTypeID()
output.ParseFromString(self._run_command(51, input))
output.ParseFromString(self._run_command(52, input))
return output.ntid
def remove_notetype(self, ntid: int) -> pb.Empty:
input = pb.NoteTypeID(ntid=ntid)
output = pb.Empty()
output.ParseFromString(self._run_command(52, input))
output.ParseFromString(self._run_command(53, input))
return output
def open_collection(
@ -604,42 +611,42 @@ class RustBackend:
log_path=log_path,
)
output = pb.Empty()
output.ParseFromString(self._run_command(53, input))
output.ParseFromString(self._run_command(54, input))
return output
def close_collection(self, downgrade_to_schema11: bool) -> pb.Empty:
input = pb.CloseCollectionIn(downgrade_to_schema11=downgrade_to_schema11)
output = pb.Empty()
output.ParseFromString(self._run_command(54, input))
output.ParseFromString(self._run_command(55, input))
return output
def check_database(self) -> Sequence[str]:
input = pb.Empty()
output = pb.CheckDatabaseOut()
output.ParseFromString(self._run_command(55, input))
output.ParseFromString(self._run_command(56, input))
return output.problems
def sync_media(self, *, hkey: str, endpoint: str) -> pb.Empty:
input = pb.SyncMediaIn(hkey=hkey, endpoint=endpoint)
output = pb.Empty()
output.ParseFromString(self._run_command(56, input))
output.ParseFromString(self._run_command(57, input))
return output
def abort_media_sync(self) -> pb.Empty:
input = pb.Empty()
output = pb.Empty()
output.ParseFromString(self._run_command(57, input))
output.ParseFromString(self._run_command(58, input))
return output
def before_upload(self) -> pb.Empty:
input = pb.Empty()
output = pb.Empty()
output.ParseFromString(self._run_command(58, input))
output.ParseFromString(self._run_command(59, input))
return output
def translate_string(self, input: pb.TranslateStringIn) -> str:
output = pb.String()
output.ParseFromString(self._run_command(59, input))
output.ParseFromString(self._run_command(60, input))
return output.val
def format_timespan(
@ -647,7 +654,7 @@ class RustBackend:
) -> str:
input = pb.FormatTimespanIn(seconds=seconds, context=context)
output = pb.String()
output.ParseFromString(self._run_command(60, input))
output.ParseFromString(self._run_command(61, input))
return output.val
def register_tags(
@ -657,61 +664,61 @@ class RustBackend:
tags=tags, preserve_usn=preserve_usn, usn=usn, clear_first=clear_first
)
output = pb.Bool()
output.ParseFromString(self._run_command(61, input))
output.ParseFromString(self._run_command(62, input))
return output.val
def all_tags(self) -> Sequence[pb.TagUsnTuple]:
input = pb.Empty()
output = pb.AllTagsOut()
output.ParseFromString(self._run_command(62, input))
output.ParseFromString(self._run_command(63, input))
return output.tags
def get_changed_tags(self, val: int) -> Sequence[str]:
input = pb.Int32(val=val)
output = pb.GetChangedTagsOut()
output.ParseFromString(self._run_command(63, input))
output.ParseFromString(self._run_command(64, input))
return output.tags
def get_config_json(self, val: str) -> bytes:
input = pb.String(val=val)
output = pb.Json()
output.ParseFromString(self._run_command(64, input))
output.ParseFromString(self._run_command(65, input))
return output.json
def set_config_json(self, *, key: str, value_json: bytes) -> pb.Empty:
input = pb.SetConfigJsonIn(key=key, value_json=value_json)
output = pb.Empty()
output.ParseFromString(self._run_command(65, input))
output.ParseFromString(self._run_command(66, input))
return output
def remove_config(self, val: str) -> pb.Empty:
input = pb.String(val=val)
output = pb.Empty()
output.ParseFromString(self._run_command(66, input))
output.ParseFromString(self._run_command(67, input))
return output
def set_all_config(self, json: bytes) -> pb.Empty:
input = pb.Json(json=json)
output = pb.Empty()
output.ParseFromString(self._run_command(67, input))
output.ParseFromString(self._run_command(68, input))
return output
def get_all_config(self) -> bytes:
input = pb.Empty()
output = pb.Json()
output.ParseFromString(self._run_command(68, input))
output.ParseFromString(self._run_command(69, input))
return output.json
def get_preferences(self) -> pb.CollectionSchedulingSettings:
input = pb.Empty()
output = pb.Preferences()
output.ParseFromString(self._run_command(69, input))
output.ParseFromString(self._run_command(70, input))
return output.sched
def set_preferences(self, sched: pb.CollectionSchedulingSettings) -> pb.Empty:
input = pb.Preferences(sched=sched)
output = pb.Empty()
output.ParseFromString(self._run_command(70, input))
output.ParseFromString(self._run_command(71, input))
return output
# @@AUTOGEN@@

View file

@ -682,6 +682,17 @@ impl BackendService for Backend {
})
}
fn note_is_duplicate_or_empty(
&mut self,
input: pb::Note,
) -> BackendResult<pb::NoteIsDuplicateOrEmptyOut> {
let note: Note = input.into();
self.with_col(|col| {
col.note_is_duplicate_or_empty(&note)
.map(|r| pb::NoteIsDuplicateOrEmptyOut { state: r as i32 })
})
}
// notetypes
//-------------------------------------------------------------------

View file

@ -1,6 +1,7 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use crate::backend_proto::note_is_duplicate_or_empty_out::State as DuplicateState;
use crate::{
backend_proto as pb,
collection::Collection,
@ -382,6 +383,28 @@ impl Collection {
Ok(changed_notes)
}
pub(crate) fn note_is_duplicate_or_empty(&self, note: &Note) -> Result<DuplicateState> {
if let Some(field1) = note.fields.get(0) {
let stripped = strip_html_preserving_image_filenames(field1);
if stripped.trim().is_empty() {
Ok(DuplicateState::Empty)
} else {
let csum = field_checksum(&stripped);
for field in self
.storage
.note_fields_by_checksum(note.id, note.ntid, csum)?
{
if strip_html_preserving_image_filenames(&field) == stripped {
return Ok(DuplicateState::Duplicate);
}
}
Ok(DuplicateState::Normal)
}
} else {
Ok(DuplicateState::Empty)
}
}
}
#[cfg(test)]

View file

@ -4,6 +4,7 @@
use crate::{
err::Result,
notes::{Note, NoteID},
notetype::NoteTypeID,
tags::{join_tags, split_tags},
timestamp::TimestampMillis,
};
@ -95,4 +96,18 @@ impl super::SqliteStorage {
.execute(NO_PARAMS)?;
Ok(())
}
/// Returns the first field of other notes with the same checksum.
/// The field of the provided note ID is not returned.
pub(crate) fn note_fields_by_checksum(
&self,
nid: NoteID,
ntid: NoteTypeID,
csum: u32,
) -> Result<Vec<String>> {
self.db
.prepare("select field_at_index(flds, 0) from notes where csum=? and mid=? and id !=?")?
.query_and_then(params![csum, ntid, nid], |r| r.get(0).map_err(Into::into))?
.collect()
}
}

View file

@ -105,6 +105,7 @@ fn want_release_gil(method: u32) -> bool {
BackendMethod::GetAllConfig => true,
BackendMethod::GetPreferences => true,
BackendMethod::SetPreferences => true,
BackendMethod::NoteIsDuplicateOrEmpty => true,
}
} else {
false