Anki/anki/find.py
2012-04-25 13:24:54 +09:00

458 lines
17 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import re
from anki.utils import ids2str, splitFields, joinFields, stripHTML, intTime
from anki.consts import *
SEARCH_TAG = 0
SEARCH_TYPE = 1
SEARCH_PHRASE = 2
SEARCH_NID = 3
SEARCH_TEMPLATE = 4
SEARCH_FIELD = 5
SEARCH_MODEL = 6
SEARCH_DECK = 7
# Tools
##########################################################################
def fieldNames(col, downcase=True):
fields = set()
names = []
for m in col.models.all():
for f in m['flds']:
if f['name'].lower() not in fields:
names.append(f['name'])
fields.add(f['name'].lower())
if downcase:
return list(fields)
return names
# Find
##########################################################################
class Finder(object):
def __init__(self, col):
self.col = col
def findCards(self, query, full=False, order=None):
"Return a list of card ids for QUERY."
self.order = order
self.query = query
self.full = full
self._findLimits()
if not self.lims['valid']:
return []
(q, args) = self._whereClause()
order = self._order()
query = """\
select c.id from cards c, notes n where %s
and c.nid=n.id %s""" % (q, order)
res = self.col.db.list(query, **args)
if not self.order and self.col.conf['sortBackwards']:
res.reverse()
return res
def _whereClause(self):
q = " and ".join(self.lims['preds'])
if not q:
q = "1"
return q, self.lims['args']
def _order(self):
# user provided override?
if self.order:
return self.order
type = self.col.conf['sortType']
if not type:
return
if type.startswith("note"):
if type == "noteCrt":
sort = "n.id, c.ord"
elif type == "noteMod":
sort = "n.mod, c.ord"
elif type == "noteFld":
sort = "n.sfld collate nocase, c.ord"
else:
raise Exception()
elif type.startswith("card"):
if type == "cardMod":
sort = "c.mod"
elif type == "cardReps":
sort = "c.reps"
elif type == "cardDue":
sort = "c.due"
elif type == "cardEase":
sort = "c.factor"
elif type == "cardLapses":
sort = "c.lapses"
elif type == "cardIvl":
sort = "c.ivl"
else:
raise Exception()
else:
raise Exception()
return " order by " + sort
def _findLimits(self):
"Generate a list of note/card limits for the query."
self.lims = {
'preds': [],
'args': {},
'valid': True,
}
for c, (token, isNeg, type) in enumerate(self._parseQuery()):
if type == SEARCH_TAG:
self._findTag(token, isNeg, c)
elif type == SEARCH_TYPE:
self._findCardState(token, isNeg)
elif type == SEARCH_NID:
self._findNids(token)
elif type == SEARCH_TEMPLATE:
self._findTemplate(token, isNeg)
elif type == SEARCH_FIELD:
self._findField(token, isNeg)
elif type == SEARCH_MODEL:
self._findModel(token, isNeg)
elif type == SEARCH_DECK:
self._findDeck(token, isNeg)
else:
self._findText(token, isNeg, c)
def _findTag(self, val, neg, c):
if val == "none":
if neg:
t = "tags != ''"
else:
t = "tags = ''"
self.lims['preds'].append(t)
return
extra = "not" if neg else ""
val = val.replace("*", "%")
if not val.startswith("%"):
val = "% " + val
if not val.endswith("%"):
val += " %"
self.lims['args']["_tag_%d" % c] = val
self.lims['preds'].append(
"tags %s like :_tag_%d""" % (extra, c))
def _findCardState(self, val, neg):
cond = None
if val in ("review", "new", "learn"):
if val == "review":
n = 2
elif val == "new":
n = 0
else:
n = 1
cond = "type = %d" % n
elif val == "suspended":
cond = "queue = -1"
elif val == "due":
cond = "(queue = 2 and due <= %d)" % self.col.sched.today
if neg:
cond = "not (%s)" % cond
if cond:
self.lims['preds'].append(cond)
else:
self.lims['valid'] = False
def _findText(self, val, neg, c):
val = val.replace("*", "%")
extra = "not" if neg else ""
if not self.full:
self.lims['args']["_text_%d"%c] = "%"+val+"%"
self.lims['preds'].append("""\
(sfld %s like :_text_%d escape '\\' or
flds %s like :_text_%d escape '\\')""" % (extra, c, extra, c))
else:
# in the future we may want to apply this at the end to speed up
# the case where there are other limits
nids = []
for nid, flds in self.col.db.execute(
"select id, flds from notes"):
if val in stripHTML(flds):
nids.append(nid)
self.lims['preds'].append("n.id in " + ids2str(nids))
def _findNids(self, val):
self.lims['preds'].append("n.id in (%s)" % val)
def _findModel(self, val, isNeg):
extra = "not" if isNeg else ""
ids = []
for m in self.col.models.all():
if m['name'].lower() == val:
ids.append(m['id'])
self.lims['preds'].append("mid %s in %s" % (extra, ids2str(ids)))
def _findDeck(self, val, isNeg):
if val.lower() == "current":
id = self.col.decks.current()['id']
else:
id = self.col.decks.id(val, create=False) or 0
ids = [id] + [a[1] for a in self.col.decks.children(id)]
sids = ids2str(ids)
if not isNeg:
# normal search
self.lims['preds'].append(
"(c.odid in %s or c.did in %s)" % (sids, sids))
else:
# inverted search
self.lims['preds'].append("""
((case c.odid when 0 then 1 else c.odid not in %s end) and c.did not in %s)
""" % (sids, sids))
def _findTemplate(self, val, isNeg):
lims = []
comp = "!=" if isNeg else "="
found = False
try:
num = int(val) - 1
except:
num = None
lims = []
# were we given an ordinal number?
if num is not None:
found = True
self.lims['preds'].append("ord %s %d" % (comp, num))
else:
# search for template names
for m in self.col.models.all():
for t in m['tmpls']:
# template name?
if t['name'].lower() == val.lower():
if m['type'] == MODEL_CLOZE:
# if the user has asked for a cloze card, we want
# to give all ordinals, so we just limit to the
# model instead
lims.append("(mid = %s)" % m['id'])
found = True
else:
lims.append((
"(nid in (select id from notes where mid = %s) "
"and ord %s %d)") % (m['id'], comp, t['ord']))
found = True
if lims:
self.lims['preds'].append("(" + " or ".join(lims) + ")")
self.lims['valid'] = found
def _findField(self, token, isNeg):
field = value = ''
parts = token.split(':', 1);
field = parts[0].lower()
value = "%" + parts[1].replace("*", "%") + "%"
# find models that have that field
mods = {}
for m in self.col.models.all():
for f in m['flds']:
if f['name'].lower() == field:
mods[str(m['id'])] = (m, f['ord'])
if not mods:
# nothing has that field
self.lims['valid'] = False
return
# gather nids
regex = value.replace("_", ".").replace("%", ".*")
nids = []
for (id,mid,flds) in self.col.db.execute("""
select id, mid, flds from notes
where mid in %s and flds like ? escape '\\'""" % (
ids2str(mods.keys())),
"%" if self.full else value):
flds = splitFields(flds)
ord = mods[str(mid)][1]
strg = flds[ord]
if self.full:
strg = stripHTML(strg)
if re.search(regex, strg):
nids.append(id)
extra = "not" if isNeg else ""
self.lims['preds'].append("""
n.mid in %s and n.id %s in %s""" % (
ids2str(mods.keys()), extra, ids2str(nids)))
# Most of this function was written by Marcus
def _parseQuery(self):
tokens = []
res = []
allowedfields = fieldNames(self.col)
def addSearchFieldToken(field, value, isNeg):
if field.lower() in allowedfields:
res.append((field + ':' + value, isNeg, SEARCH_FIELD))
else:
for p in phraselog:
res.append((p['value'], p['is_neg'], p['type']))
# break query into words or phraselog
# an extra space is added so the loop never ends in the middle
# completing a token
for match in re.findall(
r'(-)?\'(([^\'\\]|\\.)*)\'|(-)?"(([^"\\]|\\.)*)"|(-)?([^ ]+)|([ ]+)',
self.query + ' '):
value = (match[1] or match[4] or match[7])
isNeg = (match[0] == '-' or match[3] == '-' or match[6] == '-')
tokens.append({'value': value, 'is_neg': isNeg})
intoken = isNeg = False
field = '' #name of the field for field related commands
phraselog = [] #log of phrases in case potential command is not a commad
for c, token in enumerate(tokens):
doprocess = True # only look for commands when this is true
#prevent cases such as "field" : value as being processed as a command
if len(token['value']) == 0:
if intoken is True and type == SEARCH_FIELD and field:
#case: fieldname: any thing here check for existance of fieldname
addSearchFieldToken(field, '*', isNeg)
phraselog = [] # reset phrases since command is completed
intoken = doprocess = False
if intoken is True:
if type == SEARCH_FIELD and field:
#case: fieldname:"value"
addSearchFieldToken(field, token['value'], isNeg)
intoken = doprocess = False
elif type == SEARCH_FIELD and not field:
#case: "fieldname":"name" or "field" anything
if token['value'].startswith(":") and len(phraselog) == 1:
#we now know a colon is next, so mark it as field
# and keep looking for the value
field = phraselog[0]['value']
parts = token['value'].split(':', 1)
phraselog.append(
{'value': token['value'], 'is_neg': False,
'type': SEARCH_PHRASE})
if parts[1]:
#value is included with the :, so wrap it up
addSearchFieldToken(field, parts[1], isNeg)
intoken = doprocess = False
doprocess = False
else:
#case: "fieldname"string/"fieldname"tag:name
intoken = False
if intoken is False and doprocess is False:
#command has been fully processed
phraselog = [] # reset phraselog, since we used it for a command
if intoken is False:
#include any non-command related phrases in the query
for p in phraselog: res.append(
(p['value'], p['is_neg'], p['type']))
phraselog = []
if intoken is False and doprocess is True:
field = ''
isNeg = token['is_neg']
if token['value'].startswith("tag:"):
token['value'] = token['value'][4:]
type = SEARCH_TAG
elif token['value'].startswith("is:"):
token['value'] = token['value'][3:].lower()
type = SEARCH_TYPE
elif token['value'].startswith("note:"):
token['value'] = token['value'][5:].lower()
type = SEARCH_MODEL
elif token['value'].startswith("deck:"):
token['value'] = token['value'][5:].lower()
type = SEARCH_DECK
elif token['value'].startswith("nid:") and len(token['value']) > 4:
dec = token['value'][4:]
try:
int(dec)
token['value'] = token['value'][4:]
except:
try:
for d in dec.split(","):
int(d)
token['value'] = token['value'][4:]
except:
token['value'] = "0"
type = SEARCH_NID
elif token['value'].startswith("card:"):
token['value'] = token['value'][5:]
type = SEARCH_TEMPLATE
else:
type = SEARCH_FIELD
intoken = True
parts = token['value'].split(':', 1)
phraselog.append(
{'value': token['value'], 'is_neg': isNeg,
'type': SEARCH_PHRASE})
if len(parts) == 2 and parts[0]:
field = parts[0]
if parts[1]:
#simple fieldname:value case -
#no need to look for more data
addSearchFieldToken(field, parts[1], isNeg)
intoken = doprocess = False
if intoken is False: phraselog = []
if intoken is False and doprocess is True:
res.append((token['value'], isNeg, type))
return res
# Find and replace
##########################################################################
def findReplace(col, nids, src, dst, regex=False, field=None, fold=True):
"Find and replace fields in a note."
mmap = {}
if field:
for m in col.models.all():
for f in m['flds']:
if f['name'] == field:
mmap[str(m['id'])] = f['ord']
if not mmap:
return 0
# find and gather replacements
if not regex:
src = re.escape(src)
if fold:
src = "(?i)"+src
regex = re.compile(src)
def repl(str):
return re.sub(regex, dst, str)
d = []
snids = ids2str(nids)
nids = []
for nid, mid, flds in col.db.execute(
"select id, mid, flds from notes where id in "+snids):
origFlds = flds
# does it match?
sflds = splitFields(flds)
if field:
try:
ord = mmap[str(mid)]
sflds[ord] = repl(sflds[ord])
except KeyError:
# note doesn't have that field
continue
else:
for c in range(len(sflds)):
sflds[c] = repl(sflds[c])
flds = joinFields(sflds)
if flds != origFlds:
nids.append(nid)
d.append(dict(nid=nid,flds=flds,u=col.usn(),m=intTime()))
if not d:
return 0
# replace
col.db.executemany("update notes set flds=:flds,mod=:m,usn=:u where id=:nid", d)
col.updateFieldCache(nids)
col.genCards(nids)
return len(d)
# Find duplicates
##########################################################################
def findDuplicates(col, fmids):
data = col.db.all(
"select nid, value from fdata where fmid in %s" %
ids2str(fmids))
vals = {}
for (nid, val) in data:
if not val.strip():
continue
if val not in vals:
vals[val] = [nid]
else:
vals[val].append(nid)
return [(k,v) for (k,v) in vals.items() if len(v) > 1]