From c27cc17762cc22532a649b610f94bba2ec1d4ea6 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Sun, 27 May 2012 00:12:26 +0900 Subject: [PATCH] findDupes() and findNotes() --- anki/collection.py | 7 +++++-- anki/find.py | 51 +++++++++++++++++++++++++++++++++++++--------- tests/test_find.py | 31 ++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 12 deletions(-) diff --git a/anki/collection.py b/anki/collection.py index 7c09110ab..201980ed4 100644 --- a/anki/collection.py +++ b/anki/collection.py @@ -519,11 +519,14 @@ where c.nid == f.id def findCards(self, query, order=False): return anki.find.Finder(self).findCards(query, order) + def findNotes(self, query): + return anki.find.Finder(self).findNotes(query) + def findReplace(self, nids, src, dst, regex=None, field=None, fold=True): return anki.find.findReplace(self, nids, src, dst, regex, field, fold) - def findDuplicates(self, fmids): - return anki.find.findDuplicates(self, fmids) + def findDupes(self, fieldName, search=""): + return anki.find.findDupes(self, fieldName, search) # Stats ########################################################################## diff --git a/anki/find.py b/anki/find.py index 89c33a5c2..c4c01dd57 100644 --- a/anki/find.py +++ b/anki/find.py @@ -31,6 +31,24 @@ class Finder(object): res.reverse() return res + def findNotes(self, query): + tokens = self._tokenize(query) + preds, args = self._where(tokens) + if preds is None: + return [] + if preds: + preds = "(" + preds + ")" + else: + preds = "1" + sql = """ +select distinct(n.id) from cards c, notes n where c.nid=n.id and """+preds + try: + res = self.col.db.list(sql, *args) + except: + # invalid grouping + return [] + return res + # Tokenizing ###################################################################### @@ -472,16 +490,29 @@ def fieldNames(col, downcase=True): # Find duplicates ########################################################################## -def findDuplicates(col, fmids): - data = col.db.all( - "select nid, value from fdata where fmid in %s" % - ids2str(fmids)) +def findDupes(col, fieldName, search=""): + # limit search to notes with applicable field name + search += " '%s:*'" % fieldName + # go through notes vals = {} - for (nid, val) in data: - if not val.strip(): + dupes = [] + fields = {} + def ordForMid(mid): + if mid not in fields: + model = col.models.get(mid) + fields[mid] = col.models.fieldMap(model)[fieldName][0] + return fields[mid] + for nid, mid, flds in col.db.all( + "select id, mid, flds from notes where id in "+ids2str( + col.findNotes(search))): + flds = splitFields(flds) + val = flds[ordForMid(mid)] + # empty does not count as duplicate + if not val: continue if val not in vals: - vals[val] = [nid] - else: - vals[val].append(nid) - return [(k,v) for (k,v) in vals.items() if len(v) > 1] + vals[val] = [] + vals[val].append(nid) + if len(vals[val]) == 2: + dupes.append((val, vals[val])) + return dupes diff --git a/tests/test_find.py b/tests/test_find.py index db7f9eff5..b82737743 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -240,3 +240,34 @@ def test_findReplace(): f.load(); assert f['Back'] != "reg" assert deck.findReplace(nids, "B.r", "reg", regex=True) == 1 f.load(); assert f['Back'] == "reg" + +def test_findDupes(): + deck = getEmptyDeck() + f = deck.newNote() + f['Front'] = u'foo' + f['Back'] = u'bar' + deck.addNote(f) + f2 = deck.newNote() + f2['Front'] = u'baz' + f2['Back'] = u'bar' + deck.addNote(f2) + f3 = deck.newNote() + f3['Front'] = u'quux' + f3['Back'] = u'bar' + deck.addNote(f3) + f4 = deck.newNote() + f4['Front'] = u'quuux' + f4['Back'] = u'nope' + deck.addNote(f4) + r = deck.findDupes("Back") + assert r[0][0] == "bar" + assert len(r[0][1]) == 3 + # valid search + r = deck.findDupes("Back", "bar") + assert r[0][0] == "bar" + assert len(r[0][1]) == 3 + # excludes everything + r = deck.findDupes("Back", "invalid") + assert not r + # front isn't dupe + assert deck.findDupes("Front") == []