From 009906d0c24af4c722d0f150aac8db1130a4d5a5 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Mon, 30 Nov 2020 09:27:10 +1000 Subject: [PATCH] fix duplicate search not checking first field Thanks to abdo for the initial solution. Closes #838 --- rslib/src/notes.rs | 20 +++++++++++------- rslib/src/search/sqlwriter.rs | 39 +++++++++++++++++++++-------------- rslib/src/storage/note/mod.rs | 12 +++++------ 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/rslib/src/notes.rs b/rslib/src/notes.rs index e20bb5eb6..2abbf9f3f 100644 --- a/rslib/src/notes.rs +++ b/rslib/src/notes.rs @@ -434,15 +434,19 @@ impl Collection { Ok(DuplicateState::Empty) } else { let csum = field_checksum(&stripped); - for field in - self.storage - .note_fields_by_checksum(note.id, note.notetype_id, csum)? - { - if strip_html_preserving_media_filenames(&field) == stripped { - return Ok(DuplicateState::Duplicate); - } + let have_dupe = self + .storage + .note_fields_by_checksum(note.notetype_id, csum)? + .into_iter() + .any(|(nid, field)| { + nid != note.id && strip_html_preserving_media_filenames(&field) == stripped + }); + + if have_dupe { + Ok(DuplicateState::Duplicate) + } else { + Ok(DuplicateState::Normal) } - Ok(DuplicateState::Normal) } } else { Ok(DuplicateState::Empty) diff --git a/rslib/src/search/sqlwriter.rs b/rslib/src/search/sqlwriter.rs index 47d43d0eb..f02146a38 100644 --- a/rslib/src/search/sqlwriter.rs +++ b/rslib/src/search/sqlwriter.rs @@ -9,6 +9,7 @@ use crate::{ err::Result, notes::field_checksum, notetype::NoteTypeID, + storage::ids_to_string, text::{ escape_sql, is_glob, matches_glob, normalize_to_nfc, strip_html_preserving_media_filenames, to_custom_re, to_re, to_sql, to_text, without_combining, @@ -122,7 +123,7 @@ impl SqlWriter<'_> { self.write_single_field(&norm(field), &self.norm_note(text), *is_re)? } SearchNode::Duplicates { note_type_id, text } => { - self.write_dupes(*note_type_id, &self.norm_note(text)) + self.write_dupes(*note_type_id, &self.norm_note(text))? } SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)), SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)), @@ -422,16 +423,28 @@ impl SqlWriter<'_> { Ok(()) } - fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) { + fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) -> Result<()> { let text_nohtml = strip_html_preserving_media_filenames(text); let csum = field_checksum(text_nohtml.as_ref()); - write!( - self.sql, - "(n.mid = {} and n.csum = {} and n.sfld = ?)", - ntid, csum - ) - .unwrap(); - self.args.push(text_nohtml.to_string()); + + let nids: Vec<_> = self + .col + .storage + .note_fields_by_checksum(ntid, csum)? + .into_iter() + .filter_map(|(nid, field)| { + if strip_html_preserving_media_filenames(&field) == text_nohtml { + Some(nid) + } else { + None + } + }) + .collect(); + + self.sql += "n.id in "; + ids_to_string(&mut self.sql, &nids); + + Ok(()) } fn write_added(&mut self, days: u32) -> Result<()> { @@ -644,13 +657,7 @@ mod test { assert_eq!(s(ctx, "flag:0"), ("((c.flags & 7) == 0)".into(), vec![])); // dupes - assert_eq!( - s(ctx, "dupe:123,test"), - ( - "((n.mid = 123 and n.csum = 2840236005 and n.sfld = ?))".into(), - vec!["test".into()] - ) - ); + assert_eq!(s(ctx, "dupe:123,test"), ("(n.id in ())".into(), vec![])); // if registered, searches with canonical ctx.transact(None, |col| col.register_tag("One", Usn(-1))) diff --git a/rslib/src/storage/note/mod.rs b/rslib/src/storage/note/mod.rs index 46b875156..eb921b1c6 100644 --- a/rslib/src/storage/note/mod.rs +++ b/rslib/src/storage/note/mod.rs @@ -135,17 +135,17 @@ impl super::SqliteStorage { .map(|_| ()) } - /// Returns the first field of other notes with the same checksum. - /// The field of the provided note ID is not returned. + /// Returns [(nid, field 0)] of notes with the same checksum. + /// The caller should strip the fields and compare to see if they actually + /// match. pub(crate) fn note_fields_by_checksum( &self, - nid: NoteID, ntid: NoteTypeID, csum: u32, - ) -> Result> { + ) -> Result> { self.db - .prepare("select field_at_index(flds, 0) from notes where csum=? and mid=? and id !=?")? - .query_and_then(params![csum, ntid, nid], |r| r.get(0).map_err(Into::into))? + .prepare("select id, field_at_index(flds, 0) from notes where csum=? and mid=?")? + .query_and_then(params![csum, ntid], |r| Ok((r.get(0)?, r.get(1)?)))? .collect() }