Anki/anki/find.py
Damien Elmes 291bd399b7 field searching
dropped support for field:foo, as you can type 'foo:' instead to accomplish
the same thing
2011-04-28 09:24:01 +09:00

403 lines
15 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import re
from anki.utils import ids2str, splitFields
SEARCH_TAG = 0
SEARCH_TYPE = 1
SEARCH_PHRASE = 2
SEARCH_FID = 3
SEARCH_TEMPLATE = 4
SEARCH_FIELD = 5
# Find
##########################################################################
class Finder(object):
def __init__(self, deck):
self.deck = deck
def findCards(self, query):
self.query = query
(q, args) = self.findCardsWhere()
query = "select id from cards"
if q:
query += " where " + q
print query, args
return self.deck.db.list(query, **args)
def _findLimits(self):
"Generate a list of fact/card limits for the query."
self.lims = {
'fact': [],
'card': [],
'args': {},
'valid': True
}
for c, (token, isNeg, type) in enumerate(self._parseQuery()):
if type == SEARCH_TAG:
self._findTag(token, isNeg, c)
elif type == SEARCH_TYPE:
self._findCardState(token, isNeg)
elif type == SEARCH_FID:
self._findFids(token)
elif type == SEARCH_TEMPLATE:
self._findTemplate(token, isNeg)
elif type == SEARCH_FIELD:
self._findField(token, isNeg)
else:
self._findText(token, isNeg, c)
def _findTag(self, val, neg, c):
if val == "none":
self.lims['fact'].append("select id from facts where tags = ''")
return
extra = "not" if neg else ""
val = val.replace("*", "%")
if not val.startswith("%"):
val = "% " + val
if not val.endswith("%"):
val += " %"
self.lims['args']["_tag_%d" % c] = val
self.lims['fact'].append(
"tags %s like :_tag_%d""" % (extra, c))
def _findCardState(self, val, neg):
if val in ("rev", "new", "lrn"):
if val == "rev":
n = 2
elif val == "new":
n = 0
else:
n = 1
self.lims['card'].append("type = %d" % n)
elif val == "suspended":
self.lims['card'].append("queue = -1")
elif val == "due":
self.lims['card'].append("(queue = 2 and due <= %d)" %
self.deck.sched.today)
def _findText(self, val, neg, c):
val = val.replace("*", "%")
extra = "not" if neg else ""
self.lims['args']["_text_%d"%c] = "%"+val+"%"
self.lims['fact'].append("flds %s like :_text_%d escape '\\'" % (
extra, c))
def _findFids(self, val):
self.lims['fact'].append("id in (%s)" % val)
def _findTemplate(self, val, isNeg):
lims = []
comp = "!=" if isNeg else "="
found = False
try:
num = int(val) - 1
except:
num = None
for m in self.deck.models().values():
for t in m.templates:
# ordinal number?
if num is not None and t['ord'] == num:
self.lims['card'].append("ord %s %d" % (comp, num))
found = True
# template name?
elif t['name'].lower() == val.lower():
self.lims['card'].append((
"(fid in (select id from facts where mid = %d) "
"and ord %s %d)") % (m.id, comp, t['ord']))
found = True
self.lims['valid'] = found
def _findField(self, token, isNeg):
field = value = ''
parts = token.split(':', 1);
field = parts[0].lower()
value = "%" + parts[1].replace("*", "%") + "%"
# find models that have that field
mods = {}
for m in self.deck.models().values():
for f in m.fields:
if f['name'].lower() == field:
mods[m.id] = (m, f['ord'])
if not mods:
# nothing has that field
self.lims['valid'] = False
return
# gather fids
regex = value.replace("%", ".*")
fids = []
for (id,mid,flds) in self.deck.db.execute("""
select id, mid, flds from facts
where mid in %s and flds like ? escape '\\'""" % (
ids2str(mods.keys())),
value):
flds = splitFields(flds)
ord = mods[mid][1]
if re.search(regex, flds[ord]):
fids.append(id)
extra = "not" if isNeg else ""
self.lims['fact'].append("id %s in %s" % (extra, ids2str(fids)))
def findCardsWhere(self):
self._findLimits()
if not self.lims['valid']:
return "0", {}
x = []
if self.lims['fact']:
x.append("fid in (select id from facts where %s)" % " and ".join(
self.lims['fact']))
if self.lims['card']:
x.extend(self.lims['card'])
q = " and ".join(x)
return q, self.lims['args']
def _fieldNames(self):
fields = set()
for m in self.deck.models().values():
fields.update([f['name'].lower() for f in m.fields])
return list(fields)
# Most of this function was written by Marcus
def _parseQuery(self):
tokens = []
res = []
allowedfields = self._fieldNames()
def addSearchFieldToken(field, value, isNeg):
if field.lower() in allowedfields:
res.append((field + ':' + value, isNeg, SEARCH_FIELD))
else:
for p in phraselog:
res.append((p['value'], p['is_neg'], p['type']))
# break query into words or phraselog
# an extra space is added so the loop never ends in the middle
# completing a token
for match in re.findall(
r'(-)?\'(([^\'\\]|\\.)*)\'|(-)?"(([^"\\]|\\.)*)"|(-)?([^ ]+)|([ ]+)',
self.query + ' '):
value = (match[1] or match[4] or match[7])
isNeg = (match[0] == '-' or match[3] == '-' or match[6] == '-')
tokens.append({'value': value, 'is_neg': isNeg})
intoken = isNeg = False
field = '' #name of the field for field related commands
phraselog = [] #log of phrases in case potential command is not a commad
for c, token in enumerate(tokens):
doprocess = True # only look for commands when this is true
#prevent cases such as "field" : value as being processed as a command
if len(token['value']) == 0:
if intoken is True and type == SEARCH_FIELD and field:
#case: fieldname: any thing here check for existance of fieldname
addSearchFieldToken(field, '*', isNeg)
phraselog = [] # reset phrases since command is completed
intoken = doprocess = False
if intoken is True:
if type == SEARCH_FIELD and field:
#case: fieldname:"value"
addSearchFieldToken(field, token['value'], isNeg)
intoken = doprocess = False
elif type == SEARCH_FIELD and not field:
#case: "fieldname":"name" or "field" anything
if token['value'].startswith(":") and len(phraselog) == 1:
#we now know a colon is next, so mark it as field
# and keep looking for the value
field = phraselog[0]['value']
parts = token['value'].split(':', 1)
phraselog.append(
{'value': token['value'], 'is_neg': False,
'type': SEARCH_PHRASE})
if parts[1]:
#value is included with the :, so wrap it up
addSearchFieldToken(field, parts[1], isNeg, 'none')
intoken = doprocess = False
doprocess = False
else:
#case: "fieldname"string/"fieldname"tag:name
intoken = False
if intoken is False and doprocess is False:
#command has been fully processed
phraselog = [] # reset phraselog, since we used it for a command
if intoken is False:
#include any non-command related phrases in the query
for p in phraselog: res.append(
(p['value'], p['is_neg'], p['type']))
phraselog = []
if intoken is False and doprocess is True:
field = ''
isNeg = token['is_neg']
if token['value'].startswith("tag:"):
token['value'] = token['value'][4:]
type = SEARCH_TAG
elif token['value'].startswith("is:"):
token['value'] = token['value'][3:].lower()
type = SEARCH_TYPE
elif token['value'].startswith("fid:") and len(token['value']) > 4:
dec = token['value'][4:]
try:
int(dec)
token['value'] = token['value'][4:]
except:
try:
for d in dec.split(","):
int(d)
token['value'] = token['value'][4:]
except:
token['value'] = "0"
type = SEARCH_FID
elif token['value'].startswith("card:"):
token['value'] = token['value'][5:]
type = SEARCH_TEMPLATE
else:
type = SEARCH_FIELD
intoken = True
parts = token['value'].split(':', 1)
phraselog.append(
{'value': token['value'], 'is_neg': isNeg,
'type': SEARCH_PHRASE})
if len(parts) == 2 and parts[0]:
field = parts[0]
if parts[1]:
#simple fieldname:value case -
#no need to look for more data
addSearchFieldToken(field, parts[1], isNeg)
intoken = doprocess = False
if intoken is False: phraselog = []
if intoken is False and doprocess is True:
res.append((token['value'], isNeg, type))
return res
# Find and replace
##########################################################################
def findReplace(deck, fids, src, dst, isRe=False, field=None):
"Find and replace fields in a fact."
# find
s = "select id, fid, value from fdata where fid in %s"
if isRe:
isRe = re.compile(src)
else:
s += " and value like :v"
if field:
s += " and fmid = :fmid"
rows = deck.db.all(s % ids2str(fids),
v="%"+src.replace("%", "%%")+"%",
fmid=field)
modded = []
if isRe:
modded = [
{'id': id, 'fid': fid, 'val': re.sub(isRe, dst, val)}
for (id, fid, val) in rows
if isRe.search(val)]
else:
modded = [
{'id': id, 'fid': fid, 'val': val.replace(src, dst)}
for (id, fid, val) in rows
if val.find(src) != -1]
# update
if modded:
deck.db.executemany(
'update fdata set value = :val where id = :id', modded)
deck.updateCardQACacheFromIds([f['fid'] for f in modded],
type="facts")
if field:
deck.updateFieldChecksums(field)
else:
deck.updateAllFieldChecksums()
return len(set([f['fid'] for f in modded]))
# Find duplicates
##########################################################################
def findDuplicates(deck, fmids):
data = deck.db.all(
"select fid, value from fdata where fmid in %s" %
ids2str(fmids))
vals = {}
for (fid, val) in data:
if not val.strip():
continue
if val not in vals:
vals[val] = [fid]
else:
vals[val].append(fid)
return [(k,v) for (k,v) in vals.items() if len(v) > 1]
# Find & sort
##########################################################################
# copied from ankiqt and trivially changed; will not work at the moment
# if idx == 0:
# self.sortKey = "question"
# elif idx == 1:
# self.sortKey = "answer"
# elif idx == 2:
# self.sortKey = "created"
# elif idx == 3:
# self.sortKey = "modified"
# elif idx == 4:
# self.sortKey = "combinedDue"
# elif idx == 5:
# self.sortKey = "interval"
# elif idx == 6:
# self.sortKey = "reps"
# elif idx == 7:
# self.sortKey = "factor"
# elif idx == 8:
# self.sortKey = "fact"
# elif idx == 9:
# self.sortKey = "noCount"
# elif idx == 10:
# self.sortKey = "firstAnswered"
# else:
# self.sortKey = ("field", self.sortFields[idx-11])
def findSorted(deck, query, sortKey):
# sorting
if not query.strip():
ads = ""
else:
ids = self.deck.findCards(query)
ads = "cards.id in %s" % ids2str(ids)
sort = ""
if isinstance(sortKey, types.StringType):
# card property
if sortKey == "fact":
sort = "order by facts.created, cards.created"
else:
sort = "order by cards." + sortKey
if sortKey in ("question", "answer"):
sort += " collate nocase"
if sortKey == "fact":
query = """
select cards.id from cards, facts
where cards.fid = facts.id """
if ads:
query += "and " + ads + " "
else:
query = "select id from cards "
if ads:
query += "where %s " % ads
query += sort
else:
# field value
ret = self.deck.db.all(
"select id, numeric from fields where name = :name",
name=sortKey[1])
fields = ",".join([str(x[0]) for x in ret])
# if multiple models have the same field, use the first numeric bool
numeric = ret[0][1]
if numeric:
order = "cast(fdata.value as real)"
else:
order = "fdata.value collate nocase"
if ads:
ads = " and " + ads
query = ("select cards.id "
"from fdata, cards where fdata.fmid in (%s) "
"and fdata.fid = cards.fid" + ads +
" order by cards.ordinal, %s") % (fields, order)
# run the query
self.cards = self.deck.db.all(query)