fix duplicate search not checking first field

Thanks to abdo for the initial solution. Closes #838
This commit is contained in:
Damien Elmes 2020-11-30 09:27:10 +10:00
parent f1fbb9d82f
commit 009906d0c2
3 changed files with 41 additions and 30 deletions

View file

@ -434,15 +434,19 @@ impl Collection {
Ok(DuplicateState::Empty) Ok(DuplicateState::Empty)
} else { } else {
let csum = field_checksum(&stripped); let csum = field_checksum(&stripped);
for field in let have_dupe = self
self.storage .storage
.note_fields_by_checksum(note.id, note.notetype_id, csum)? .note_fields_by_checksum(note.notetype_id, csum)?
{ .into_iter()
if strip_html_preserving_media_filenames(&field) == stripped { .any(|(nid, field)| {
return Ok(DuplicateState::Duplicate); nid != note.id && strip_html_preserving_media_filenames(&field) == stripped
} });
if have_dupe {
Ok(DuplicateState::Duplicate)
} else {
Ok(DuplicateState::Normal)
} }
Ok(DuplicateState::Normal)
} }
} else { } else {
Ok(DuplicateState::Empty) Ok(DuplicateState::Empty)

View file

@ -9,6 +9,7 @@ use crate::{
err::Result, err::Result,
notes::field_checksum, notes::field_checksum,
notetype::NoteTypeID, notetype::NoteTypeID,
storage::ids_to_string,
text::{ text::{
escape_sql, is_glob, matches_glob, normalize_to_nfc, strip_html_preserving_media_filenames, escape_sql, is_glob, matches_glob, normalize_to_nfc, strip_html_preserving_media_filenames,
to_custom_re, to_re, to_sql, to_text, without_combining, to_custom_re, to_re, to_sql, to_text, without_combining,
@ -122,7 +123,7 @@ impl SqlWriter<'_> {
self.write_single_field(&norm(field), &self.norm_note(text), *is_re)? self.write_single_field(&norm(field), &self.norm_note(text), *is_re)?
} }
SearchNode::Duplicates { note_type_id, text } => { SearchNode::Duplicates { note_type_id, text } => {
self.write_dupes(*note_type_id, &self.norm_note(text)) self.write_dupes(*note_type_id, &self.norm_note(text))?
} }
SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)), SearchNode::Regex(re) => self.write_regex(&self.norm_note(re)),
SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)), SearchNode::NoCombining(text) => self.write_no_combining(&self.norm_note(text)),
@ -422,16 +423,28 @@ impl SqlWriter<'_> {
Ok(()) Ok(())
} }
fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) { fn write_dupes(&mut self, ntid: NoteTypeID, text: &str) -> Result<()> {
let text_nohtml = strip_html_preserving_media_filenames(text); let text_nohtml = strip_html_preserving_media_filenames(text);
let csum = field_checksum(text_nohtml.as_ref()); let csum = field_checksum(text_nohtml.as_ref());
write!(
self.sql, let nids: Vec<_> = self
"(n.mid = {} and n.csum = {} and n.sfld = ?)", .col
ntid, csum .storage
) .note_fields_by_checksum(ntid, csum)?
.unwrap(); .into_iter()
self.args.push(text_nohtml.to_string()); .filter_map(|(nid, field)| {
if strip_html_preserving_media_filenames(&field) == text_nohtml {
Some(nid)
} else {
None
}
})
.collect();
self.sql += "n.id in ";
ids_to_string(&mut self.sql, &nids);
Ok(())
} }
fn write_added(&mut self, days: u32) -> Result<()> { fn write_added(&mut self, days: u32) -> Result<()> {
@ -644,13 +657,7 @@ mod test {
assert_eq!(s(ctx, "flag:0"), ("((c.flags & 7) == 0)".into(), vec![])); assert_eq!(s(ctx, "flag:0"), ("((c.flags & 7) == 0)".into(), vec![]));
// dupes // dupes
assert_eq!( assert_eq!(s(ctx, "dupe:123,test"), ("(n.id in ())".into(), vec![]));
s(ctx, "dupe:123,test"),
(
"((n.mid = 123 and n.csum = 2840236005 and n.sfld = ?))".into(),
vec!["test".into()]
)
);
// if registered, searches with canonical // if registered, searches with canonical
ctx.transact(None, |col| col.register_tag("One", Usn(-1))) ctx.transact(None, |col| col.register_tag("One", Usn(-1)))

View file

@ -135,17 +135,17 @@ impl super::SqliteStorage {
.map(|_| ()) .map(|_| ())
} }
/// Returns the first field of other notes with the same checksum. /// Returns [(nid, field 0)] of notes with the same checksum.
/// The field of the provided note ID is not returned. /// The caller should strip the fields and compare to see if they actually
/// match.
pub(crate) fn note_fields_by_checksum( pub(crate) fn note_fields_by_checksum(
&self, &self,
nid: NoteID,
ntid: NoteTypeID, ntid: NoteTypeID,
csum: u32, csum: u32,
) -> Result<Vec<String>> { ) -> Result<Vec<(NoteID, String)>> {
self.db self.db
.prepare("select field_at_index(flds, 0) from notes where csum=? and mid=? and id !=?")? .prepare("select id, field_at_index(flds, 0) from notes where csum=? and mid=?")?
.query_and_then(params![csum, ntid, nid], |r| r.get(0).map_err(Into::into))? .query_and_then(params![csum, ntid], |r| Ok((r.get(0)?, r.get(1)?)))?
.collect() .collect()
} }