From 8c7322c9747d16278cd34095b804a70ee4a3f791 Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Wed, 27 Oct 2010 20:54:42 +0900 Subject: [PATCH] merge marcus' searching code --- anki/deck.py | 511 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 439 insertions(+), 72 deletions(-) diff --git a/anki/deck.py b/anki/deck.py index 7224fbeec..2f107a09e 100644 --- a/anki/deck.py +++ b/anki/deck.py @@ -56,6 +56,12 @@ SEARCH_TAG = 0 SEARCH_TYPE = 1 SEARCH_PHRASE = 2 SEARCH_FID = 3 +SEARCH_CARD = 4 +SEARCH_DISTINCT = 5 +SEARCH_FIELD = 6 +SEARCH_FIELD_EXISTS = 7 +SEARCH_QA = 8 +SEARCH_PHRASE_WB = 9 DECK_VERSION = 48 deckVarsTable = Table( @@ -2114,43 +2120,366 @@ where id = :id""", pending) # Find ########################################################################## + def allFMFields(self, tolower=False): + fields = [] + try: + fields = self.s.column0( + "select distinct name from fieldmodels order by name") + except: + fields = [] + if tolower is True: + for i, v in enumerate(fields): + fields[i] = v.lower() + return fields + def _parseQuery(self, query): tokens = [] res = [] - # break query into words or phrases - for match in re.findall('"(.+?)"|([^ "]+)', query): - tokens.append(match[0] or match[1]) - for c, token in enumerate(tokens): - isNeg = token.startswith("-") - if isNeg: - token = token[1:] - if token.startswith("tag:"): - token = token[4:] - type = SEARCH_TAG - elif token.startswith("is:"): - token = token[3:].lower() - type = SEARCH_TYPE - elif token.startswith("fid:") and len(token) > 4: - dec = token[4:] - try: - int(dec) - token = token[4:] - except: - token = "0" - type = SEARCH_FID + + allowedfields = self.allFMFields(True) + def addSearchFieldToken(field, value, isNeg, filter): + if field.lower() in allowedfields: + res.append((field + ':' + value, isNeg, SEARCH_FIELD, filter)) + elif field in ['question', 'answer']: + res.append((field + ':' + value, isNeg, SEARCH_QA, filter)) else: - type = SEARCH_PHRASE - res.append((token, isNeg, type)) + for p in phraselog: + res.append((p['value'], p['is_neg'], p['type'], p['filter'])) + # break query into words or phraselog + # an extra space is added so the loop never ends in the middle + # completing a token + for match in re.findall( + r'(-)?\'(([^\'\\]|\\.)*)\'|(-)?"(([^"\\]|\\.)*)"|(-)?([^ ]+)|([ ]+)', + query + ' '): + type = ' ' + if match[1]: type = "'" + elif match[4]: type = '"' + + value = (match[1] or match[4] or match[7]) + isNeg = (match[0] == '-' or match[3] == '-' or match[6] == '-') + + tokens.append({'type': type, 'value': value, 'is_neg': isNeg, + 'filter': ('wb' if type == "'" else 'none')}) + intoken = isNeg = False + field = '' #name of the field for field related commands + phraselog = [] #log of phrases in case potential command is not a commad + for c, token in enumerate(tokens): + doprocess = True # only look for commands when this is true + #prevent cases such as "field" : value as being processed as a command + if len(token['value']) == 0: + if intoken is True and type == SEARCH_FIELD and field: + #case: fieldname: any thing here check for existance of fieldname + addSearchFieldToken(field, '*', isNeg, 'none') + phraselog = [] # reset phrases since command is completed + intoken = doprocess = False + if intoken is True: + if type == SEARCH_FIELD_EXISTS: + #case: field:"value" + res.append((token['value'], isNeg, type, 'none')) + intoken = doprocess = False + elif type == SEARCH_FIELD and field: + #case: fieldname:"value" + addSearchFieldToken( + field, token['value'], isNeg, token['filter']) + intoken = doprocess = False + + elif type == SEARCH_FIELD and not field: + #case: "fieldname":"name" or "field" anything + if token['value'].startswith(":") and len(phraselog) == 1: + #we now know a colon is next, so mark it as field + # and keep looking for the value + field = phraselog[0][0] + parts = token['value'].split(':', 1) + phraselog.append( + {'value': token, 'is_neg': False, + 'type': SEARCH_PHRASE, 'filter': token['filter']}) + if parts[1]: + #value is included with the :, so wrap it up + addSearchFieldToken(field, parts[1], isNeg, 'none') + intoken = doprocess = False + doprocess = False + else: + #case: "fieldname"string/"fieldname"tag:name + intoken = False + if intoken is False and doprocess is False: + #command has been fully processed + phraselog = [] # reset phraselog, since we used it for a command + if intoken is False: + #include any non-command related phrases in the query + for p in phraselog: res.append( + (p['value'], p['is_neg'], p['type'], p['filter'])) + phraselog = [] + if intoken is False and doprocess is True: + field = '' + isNeg = token['is_neg'] + if token['value'].startswith("tag:"): + token['value'] = token['value'][4:] + type = SEARCH_TAG + elif token['value'].startswith("is:"): + token['value'] = token['value'][3:].lower() + type = SEARCH_TYPE + elif token['value'].startswith("fid:") and len(token) > 4: + dec = token['value'][4:] + try: + int(dec) + token['value'] = token['value'][4:] + except: + token['value'] = "0" + type = SEARCH_FID + elif token['value'].startswith("card:"): + token['value'] = token['value'][5:] + type = SEARCH_CARD + elif token['value'].startswith("show:"): + token['value'] = token['value'][5:].lower() + type = SEARCH_DISTINCT + elif token['value'].startswith("field:"): + type = SEARCH_FIELD_EXISTS + parts = token['value'][6:].split(':', 1) + field = parts[0] + if len(parts) == 1 and parts[0]: + token['value'] = parts[0] + elif len(parts) == 1 and not parts[0]: + intoken = True + else: + type = SEARCH_FIELD + intoken = True + parts = token['value'].split(':', 1) + + phraselog.append( + {'value': token['value'], 'is_neg': isNeg, + 'type': SEARCH_PHRASE, 'filter': token['filter']}) + if len(parts) == 2 and parts[0]: + field = parts[0] + if parts[1]: + #simple fieldname:value case - no need to look for more data + addSearchFieldToken(field, parts[1], isNeg, 'none') + intoken = doprocess = False + + if intoken is False: phraselog = [] + if intoken is False and doprocess is True: + res.append((token['value'], isNeg, type, token['filter'])) return res + def findCards(self, query): + (q, cmquery, showdistinct, filters, args) = self.findCardsWhere(query) + (factIdList, cardIdList) = self.findCardsMatchingFilters(filters) + query = "select id from cards" + hasWhere = False + if q: + query += " where " + q + hasWhere = True + if cmquery['pos'] or cmquery['neg']: + if hasWhere is False: + query += " where " + hasWhere = True + else: query += " and " + if cmquery['pos']: + query += (" factId in(select distinct factId from cards "+ + "where id in (" + cmquery['pos'] + ")) ") + query += " and id in(" + cmquery['pos'] + ") " + if cmquery['neg']: + query += (" factId not in(select distinct factId from "+ + "cards where id in (" + cmquery['neg'] + ")) ") + if factIdList is not None: + if hasWhere is False: + query += " where " + hasWhere = True + else: query += " and " + query += " factId IN %s" % ids2str(factIdList) + if cardIdList is not None: + if hasWhere is False: + query += " where " + hasWhere = True + else: query += " and " + query += " id IN %s" % ids2str(cardIdList) + if showdistinct: + query += " group by factId" + return self.s.column0(query, **args) + + def findCardsWhere(self, query): + (tquery, fquery, qquery, fidquery, cmquery, sfquery, qaquery, + showdistinct, filters, args) = self._findCards(query) + q = "" + x = [] + if tquery: + x.append(" id in (%s)" % tquery) + if fquery: + x.append(" factId in (%s)" % fquery) + if qquery: + x.append(" id in (%s)" % qquery) + if fidquery: + x.append(" id in (%s)" % fidquery) + if sfquery: + x.append(" factId in (%s)" % sfquery) + if qaquery: + x.append(" id in (%s)" % qaquery) + if x: + q += " and ".join(x) + return q, cmquery, showdistinct, filters, args + + def findCardsMatchingFilters(self, filters): + factFilters = [] + fieldFilters = {} + cardFilters = {} + + factFilterMatches = [] + fieldFilterMatches = [] + cardFilterMatches = [] + + if (len(filters) > 0): + for filter in filters: + if filter['scope'] == 'fact': + regexp = re.compile( + r'\b' + re.escape(filter['value']) + r'\b', flags=re.I) + factFilters.append( + {'value': filter['value'], 'regexp': regexp, + 'is_neg': filter['is_neg']}) + if filter['scope'] == 'field': + fieldName = filter['field'].lower() + if (fieldName in fieldFilters) is False: + fieldFilters[fieldName] = [] + regexp = re.compile( + r'\b' + re.escape(filter['value']) + r'\b', flags=re.I) + fieldFilters[fieldName].append( + {'value': filter['value'], 'regexp': regexp, + 'is_neg': filter['is_neg']}) + if filter['scope'] == 'card': + fieldName = filter['field'].lower() + if (fieldName in cardFilters) is False: + cardFilters[fieldName] = [] + regexp = re.compile(r'\b' + re.escape(filter['value']) + r'\b', flags=re.I) + cardFilters[fieldName].append( + {'value': filter['value'], 'regexp': regexp, + 'is_neg': filter['is_neg']}) + + if len(factFilters) > 0: + fquery = '' + args = {} + for filter in factFilters: + c = len(args) + if fquery: + if filter['is_neg']: fquery += " except " + else: fquery += " intersect " + elif filter['is_neg']: fquery += "select id from fields except " + + value = filter['value'].replace("*", "%") + args["_ff_%d" % c] = "%"+value+"%" + + fquery += ( + "select id from fields where value like "+ + ":_ff_%d escape '\\'" % c) + + rows = self.s.execute( + 'select factId, value from fields where id in (' + + fquery + ')', args) + while (1): + row = rows.fetchone() + if row is None: break + doesMatch = False + for filter in factFilters: + res = filter['regexp'].search(row[1]) + if ((filter['is_neg'] is False and res) or + (filter['is_neg'] is True and res is None)): + factFilterMatches.append(row[0]) + + if len(fieldFilters) > 0: + sfquery = '' + args = {} + for field, filters in fieldFilters.iteritems(): + for filter in filters: + c = len(args) + if sfquery: + if filter['is_neg']: sfquery += " except " + else: sfquery += " intersect " + elif filter['is_neg']: sfquery += "select id from fields except " + field = field.replace("*", "%") + value = filter['value'].replace("*", "%") + args["_ff_%d" % c] = "%"+value+"%" + + ids = self.s.column0( + "select id from fieldmodels where name like "+ + ":field escape '\\'", field=field) + sfquery += ("select id from fields where "+ + "fieldModelId in %s and value like "+ + ":_ff_%d escape '\\'") % (ids2str(ids), c) + + rows = self.s.execute( + 'select f.factId, f.value, fm.name from fields as f '+ + 'left join fieldmodels as fm ON (f.fieldModelId = '+ + 'fm.id) where f.id in (' + sfquery + ')', args) + while (1): + row = rows.fetchone() + if row is None: break + field = row[2].lower() + doesMatch = False + if field in fieldFilters: + for filter in fieldFilters[field]: + res = filter['regexp'].search(row[1]) + if ((filter['is_neg'] is False and res) or + (filter['is_neg'] is True and res is None)): + fieldFilterMatches.append(row[0]) + + + if len(cardFilters) > 0: + qaquery = '' + args = {} + for field, filters in cardFilters.iteritems(): + for filter in filters: + c = len(args) + if qaquery: + if filter['is_neg']: qaquery += " except " + else: qaquery += " intersect " + elif filter['is_neg']: qaquery += "select id from cards except " + value = value.replace("*", "%") + args["_ff_%d" % c] = "%"+value+"%" + + if field == 'question': + qaquery += "select id from cards where question " + qaquery += "like :_ff_%d escape '\\'" % c + else: + qaquery += "select id from cards where answer " + qaquery += "like :_ff_%d escape '\\'" % c + + rows = self.s.execute( + 'select id, question, answer from cards where id IN (' + + qaquery + ')', args) + while (1): + row = rows.fetchone() + if row is None: break + doesMatch = False + if field in cardFilters: + rowValue = row[1] if field == 'question' else row[2] + for filter in cardFilters[field]: + res = filter['regexp'].search(rowValue) + if ((filter['is_neg'] is False and res) or + (filter['is_neg'] is True and res is None)): + cardFilterMatches.append(row[0]) + + factIds = None + if len(factFilters) > 0 or len(fieldFilters) > 0: + factIds = [] + factIds.extend(factFilterMatches) + factIds.extend(fieldFilterMatches) + + cardIds = None + if len(cardFilters) > 0: + cardIds = [] + cardIds.extend(cardFilterMatches) + + return (factIds, cardIds) + def _findCards(self, query): "Find facts matching QUERY." tquery = "" fquery = "" qquery = "" fidquery = "" + cmquery = { 'pos': '', 'neg': '' } + sfquery = qaquery = "" + showdistinct = False + filters = [] args = {} - for c, (token, isNeg, type) in enumerate(self._parseQuery(query)): + for c, (token, isNeg, type, filter) in enumerate(self._parseQuery(query)): if type == SEARCH_TAG: # a tag if tquery: @@ -2161,16 +2490,11 @@ where id = :id""", pending) elif isNeg: tquery += "select id from cards except " if token == "none": - tquery += """ -select cards.id from cards, facts where facts.tags = '' -and cards.factId = facts.id """ + tquery += """select cards.id from cards, facts where facts.tags = '' and cards.factId = facts.id """ else: token = token.replace("*", "%") - ids = self.s.column0( - "select id from tags where tag like :tag", tag=token) - tquery += """ -select cardId from cardTags where -cardTags.tagId in %s""" % ids2str(ids) + ids = self.s.column0("select id from tags where tag like :tag escape '\\'", tag=token) + tquery += """select cardId from cardTags where cardTags.tagId in %s""" % ids2str(ids) elif type == SEARCH_TYPE: if qquery: if isNeg: @@ -2189,15 +2513,19 @@ cardTags.tagId in %s""" % ids2str(ids) qquery += "select id from cards where type = %d" % n elif token == "delayed": qquery += ("select id from cards where " - "due < %d and combinedDue > %d and " - "type in (0,1,2)") % ( - self.dueCutoff, self.dueCutoff) + "due < %d and isDue = 0 and " + "priority in (1,2,3,4)") % time.time() elif token == "suspended": qquery += ("select id from cards where " "priority = -3") + elif token == "inactive": + qquery += ("select id from cards where " + "priority = 0") + elif token == "leech": + qquery += ("select id from cards where noCount >= (select value from deckvars where key = \"leechFails\")") else: # due qquery += ("select id from cards where " - "type in (0,1) and combinedDue < %d") % self.dueCutoff + "type in (0,1) and isDue = 1") elif type == SEARCH_FID: if fidquery: if isNeg: @@ -2207,44 +2535,83 @@ cardTags.tagId in %s""" % ids2str(ids) elif isNeg: fidquery += "select id from cards except " fidquery += "select id from cards where factId = %s" % token - else: - # a field - if fquery: - if isNeg: - fquery += " except " - else: - fquery += " intersect " - elif isNeg: - fquery += "select id from facts except " + elif type == SEARCH_CARD: token = token.replace("*", "%") - token = token.replace("?", "_") - args["_ff_%d" % c] = "%"+token+"%" - q = "select factId from fields where value like :_ff_%d" % c - fquery += q - return (tquery, fquery, qquery, fidquery, args) + ids = self.s.column0("select id from tags where tag like :tag escape '\\'", tag=token) + if isNeg: + if cmquery['neg']: + cmquery['neg'] += " intersect " + cmquery['neg'] += """select cardId from cardTags where src = 2 and cardTags.tagId in %s""" % ids2str(ids) + else: + if cmquery['pos']: + cmquery['pos'] += " intersect " + cmquery['pos'] += """select cardId from cardTags where src = 2 and cardTags.tagId in %s""" % ids2str(ids) + elif type == SEARCH_FIELD or type == SEARCH_FIELD_EXISTS: + field = value = '' + if type == SEARCH_FIELD: + parts = token.split(':', 1); + if len(parts) == 2: + field = parts[0] + value = parts[1] + elif type == SEARCH_FIELD_EXISTS: + field = token + value = '*' + if (type == SEARCH_FIELD and filter != 'none'): + if field and value: filters.append({'scope': 'field', 'type': filter, 'field': field, 'value': value, 'is_neg': isNeg}) + else: + if field and value: + if sfquery: + if isNeg: + sfquery += " except " + else: + sfquery += " intersect " + elif isNeg: + sfquery += "select id from facts except " + field = field.replace("*", "%") + value = value.replace("*", "%") + args["_ff_%d" % c] = "%"+value+"%" + ids = self.s.column0("select id from fieldmodels where name like :field escape '\\'", field=field) + sfquery += "select factId from fields where fieldModelId in %s and value like :_ff_%d escape '\\'" % (ids2str(ids), c) + elif type == SEARCH_QA: + field = value = '' + parts = token.split(':', 1); + if len(parts) == 2: + field = parts[0] + value = parts[1] + if (filter != 'none'): + if field and value: filters.append({'scope': 'card', 'type': filter, 'field': field, 'value': value, 'is_neg': isNeg}) + else: + if field and value: + if qaquery: + if isNeg: + qaquery += " except " + else: + qaquery += " intersect " + elif isNeg: + qaquery += "select id from cards except " + value = value.replace("*", "%") + args["_ff_%d" % c] = "%"+value+"%" - def findCardsWhere(self, query): - (tquery, fquery, qquery, fidquery, args) = self._findCards(query) - q = "" - x = [] - if tquery: - x.append(" id in (%s)" % tquery) - if fquery: - x.append(" factId in (%s)" % fquery) - if qquery: - x.append(" id in (%s)" % qquery) - if fidquery: - x.append(" id in (%s)" % fidquery) - if x: - q += " and ".join(x) - return q, args - - def findCards(self, query): - q, args = self.findCardsWhere(query) - query = "select id from cards" - if q: - query += " where " + q - return self.s.column0(query, **args) + if field == 'question': qaquery += "select id from cards where question like :_ff_%d escape '\\'" % c + else: qaquery += "select id from cards where answer like :_ff_%d escape '\\'" % c + elif type == SEARCH_DISTINCT: + if isNeg is False: showdistinct = True if token == "one" else False + else: showdistinct = False if token == "one" else True + else: + if (filter != 'none'): + filters.append({'scope': 'fact', 'type': filter, 'value': token, 'is_neg': isNeg}) + else: + if fquery: + if isNeg: + fquery += " except " + else: + fquery += " intersect " + elif isNeg: + fquery += "select id from facts except " + token = token.replace("*", "%") + args["_ff_%d" % c] = "%"+token+"%" + fquery += "select factId from fields where value like :_ff_%d escape '\\'" % c + return (tquery, fquery, qquery, fidquery, cmquery, sfquery, qaquery, showdistinct, filters, args) # Find and replace ##########################################################################