merge marcus' searching code

This commit is contained in:
Damien Elmes 2010-10-27 20:54:42 +09:00
parent 34d6efe1df
commit 8c7322c974

View file

@ -56,6 +56,12 @@ SEARCH_TAG = 0
SEARCH_TYPE = 1 SEARCH_TYPE = 1
SEARCH_PHRASE = 2 SEARCH_PHRASE = 2
SEARCH_FID = 3 SEARCH_FID = 3
SEARCH_CARD = 4
SEARCH_DISTINCT = 5
SEARCH_FIELD = 6
SEARCH_FIELD_EXISTS = 7
SEARCH_QA = 8
SEARCH_PHRASE_WB = 9
DECK_VERSION = 48 DECK_VERSION = 48
deckVarsTable = Table( deckVarsTable = Table(
@ -2114,43 +2120,366 @@ where id = :id""", pending)
# Find # Find
########################################################################## ##########################################################################
def allFMFields(self, tolower=False):
fields = []
try:
fields = self.s.column0(
"select distinct name from fieldmodels order by name")
except:
fields = []
if tolower is True:
for i, v in enumerate(fields):
fields[i] = v.lower()
return fields
def _parseQuery(self, query): def _parseQuery(self, query):
tokens = [] tokens = []
res = [] res = []
# break query into words or phrases
for match in re.findall('"(.+?)"|([^ "]+)', query): allowedfields = self.allFMFields(True)
tokens.append(match[0] or match[1]) def addSearchFieldToken(field, value, isNeg, filter):
for c, token in enumerate(tokens): if field.lower() in allowedfields:
isNeg = token.startswith("-") res.append((field + ':' + value, isNeg, SEARCH_FIELD, filter))
if isNeg: elif field in ['question', 'answer']:
token = token[1:] res.append((field + ':' + value, isNeg, SEARCH_QA, filter))
if token.startswith("tag:"):
token = token[4:]
type = SEARCH_TAG
elif token.startswith("is:"):
token = token[3:].lower()
type = SEARCH_TYPE
elif token.startswith("fid:") and len(token) > 4:
dec = token[4:]
try:
int(dec)
token = token[4:]
except:
token = "0"
type = SEARCH_FID
else: else:
type = SEARCH_PHRASE for p in phraselog:
res.append((token, isNeg, type)) res.append((p['value'], p['is_neg'], p['type'], p['filter']))
# break query into words or phraselog
# an extra space is added so the loop never ends in the middle
# completing a token
for match in re.findall(
r'(-)?\'(([^\'\\]|\\.)*)\'|(-)?"(([^"\\]|\\.)*)"|(-)?([^ ]+)|([ ]+)',
query + ' '):
type = ' '
if match[1]: type = "'"
elif match[4]: type = '"'
value = (match[1] or match[4] or match[7])
isNeg = (match[0] == '-' or match[3] == '-' or match[6] == '-')
tokens.append({'type': type, 'value': value, 'is_neg': isNeg,
'filter': ('wb' if type == "'" else 'none')})
intoken = isNeg = False
field = '' #name of the field for field related commands
phraselog = [] #log of phrases in case potential command is not a commad
for c, token in enumerate(tokens):
doprocess = True # only look for commands when this is true
#prevent cases such as "field" : value as being processed as a command
if len(token['value']) == 0:
if intoken is True and type == SEARCH_FIELD and field:
#case: fieldname: any thing here check for existance of fieldname
addSearchFieldToken(field, '*', isNeg, 'none')
phraselog = [] # reset phrases since command is completed
intoken = doprocess = False
if intoken is True:
if type == SEARCH_FIELD_EXISTS:
#case: field:"value"
res.append((token['value'], isNeg, type, 'none'))
intoken = doprocess = False
elif type == SEARCH_FIELD and field:
#case: fieldname:"value"
addSearchFieldToken(
field, token['value'], isNeg, token['filter'])
intoken = doprocess = False
elif type == SEARCH_FIELD and not field:
#case: "fieldname":"name" or "field" anything
if token['value'].startswith(":") and len(phraselog) == 1:
#we now know a colon is next, so mark it as field
# and keep looking for the value
field = phraselog[0][0]
parts = token['value'].split(':', 1)
phraselog.append(
{'value': token, 'is_neg': False,
'type': SEARCH_PHRASE, 'filter': token['filter']})
if parts[1]:
#value is included with the :, so wrap it up
addSearchFieldToken(field, parts[1], isNeg, 'none')
intoken = doprocess = False
doprocess = False
else:
#case: "fieldname"string/"fieldname"tag:name
intoken = False
if intoken is False and doprocess is False:
#command has been fully processed
phraselog = [] # reset phraselog, since we used it for a command
if intoken is False:
#include any non-command related phrases in the query
for p in phraselog: res.append(
(p['value'], p['is_neg'], p['type'], p['filter']))
phraselog = []
if intoken is False and doprocess is True:
field = ''
isNeg = token['is_neg']
if token['value'].startswith("tag:"):
token['value'] = token['value'][4:]
type = SEARCH_TAG
elif token['value'].startswith("is:"):
token['value'] = token['value'][3:].lower()
type = SEARCH_TYPE
elif token['value'].startswith("fid:") and len(token) > 4:
dec = token['value'][4:]
try:
int(dec)
token['value'] = token['value'][4:]
except:
token['value'] = "0"
type = SEARCH_FID
elif token['value'].startswith("card:"):
token['value'] = token['value'][5:]
type = SEARCH_CARD
elif token['value'].startswith("show:"):
token['value'] = token['value'][5:].lower()
type = SEARCH_DISTINCT
elif token['value'].startswith("field:"):
type = SEARCH_FIELD_EXISTS
parts = token['value'][6:].split(':', 1)
field = parts[0]
if len(parts) == 1 and parts[0]:
token['value'] = parts[0]
elif len(parts) == 1 and not parts[0]:
intoken = True
else:
type = SEARCH_FIELD
intoken = True
parts = token['value'].split(':', 1)
phraselog.append(
{'value': token['value'], 'is_neg': isNeg,
'type': SEARCH_PHRASE, 'filter': token['filter']})
if len(parts) == 2 and parts[0]:
field = parts[0]
if parts[1]:
#simple fieldname:value case - no need to look for more data
addSearchFieldToken(field, parts[1], isNeg, 'none')
intoken = doprocess = False
if intoken is False: phraselog = []
if intoken is False and doprocess is True:
res.append((token['value'], isNeg, type, token['filter']))
return res return res
def findCards(self, query):
(q, cmquery, showdistinct, filters, args) = self.findCardsWhere(query)
(factIdList, cardIdList) = self.findCardsMatchingFilters(filters)
query = "select id from cards"
hasWhere = False
if q:
query += " where " + q
hasWhere = True
if cmquery['pos'] or cmquery['neg']:
if hasWhere is False:
query += " where "
hasWhere = True
else: query += " and "
if cmquery['pos']:
query += (" factId in(select distinct factId from cards "+
"where id in (" + cmquery['pos'] + ")) ")
query += " and id in(" + cmquery['pos'] + ") "
if cmquery['neg']:
query += (" factId not in(select distinct factId from "+
"cards where id in (" + cmquery['neg'] + ")) ")
if factIdList is not None:
if hasWhere is False:
query += " where "
hasWhere = True
else: query += " and "
query += " factId IN %s" % ids2str(factIdList)
if cardIdList is not None:
if hasWhere is False:
query += " where "
hasWhere = True
else: query += " and "
query += " id IN %s" % ids2str(cardIdList)
if showdistinct:
query += " group by factId"
return self.s.column0(query, **args)
def findCardsWhere(self, query):
(tquery, fquery, qquery, fidquery, cmquery, sfquery, qaquery,
showdistinct, filters, args) = self._findCards(query)
q = ""
x = []
if tquery:
x.append(" id in (%s)" % tquery)
if fquery:
x.append(" factId in (%s)" % fquery)
if qquery:
x.append(" id in (%s)" % qquery)
if fidquery:
x.append(" id in (%s)" % fidquery)
if sfquery:
x.append(" factId in (%s)" % sfquery)
if qaquery:
x.append(" id in (%s)" % qaquery)
if x:
q += " and ".join(x)
return q, cmquery, showdistinct, filters, args
def findCardsMatchingFilters(self, filters):
factFilters = []
fieldFilters = {}
cardFilters = {}
factFilterMatches = []
fieldFilterMatches = []
cardFilterMatches = []
if (len(filters) > 0):
for filter in filters:
if filter['scope'] == 'fact':
regexp = re.compile(
r'\b' + re.escape(filter['value']) + r'\b', flags=re.I)
factFilters.append(
{'value': filter['value'], 'regexp': regexp,
'is_neg': filter['is_neg']})
if filter['scope'] == 'field':
fieldName = filter['field'].lower()
if (fieldName in fieldFilters) is False:
fieldFilters[fieldName] = []
regexp = re.compile(
r'\b' + re.escape(filter['value']) + r'\b', flags=re.I)
fieldFilters[fieldName].append(
{'value': filter['value'], 'regexp': regexp,
'is_neg': filter['is_neg']})
if filter['scope'] == 'card':
fieldName = filter['field'].lower()
if (fieldName in cardFilters) is False:
cardFilters[fieldName] = []
regexp = re.compile(r'\b' + re.escape(filter['value']) + r'\b', flags=re.I)
cardFilters[fieldName].append(
{'value': filter['value'], 'regexp': regexp,
'is_neg': filter['is_neg']})
if len(factFilters) > 0:
fquery = ''
args = {}
for filter in factFilters:
c = len(args)
if fquery:
if filter['is_neg']: fquery += " except "
else: fquery += " intersect "
elif filter['is_neg']: fquery += "select id from fields except "
value = filter['value'].replace("*", "%")
args["_ff_%d" % c] = "%"+value+"%"
fquery += (
"select id from fields where value like "+
":_ff_%d escape '\\'" % c)
rows = self.s.execute(
'select factId, value from fields where id in (' +
fquery + ')', args)
while (1):
row = rows.fetchone()
if row is None: break
doesMatch = False
for filter in factFilters:
res = filter['regexp'].search(row[1])
if ((filter['is_neg'] is False and res) or
(filter['is_neg'] is True and res is None)):
factFilterMatches.append(row[0])
if len(fieldFilters) > 0:
sfquery = ''
args = {}
for field, filters in fieldFilters.iteritems():
for filter in filters:
c = len(args)
if sfquery:
if filter['is_neg']: sfquery += " except "
else: sfquery += " intersect "
elif filter['is_neg']: sfquery += "select id from fields except "
field = field.replace("*", "%")
value = filter['value'].replace("*", "%")
args["_ff_%d" % c] = "%"+value+"%"
ids = self.s.column0(
"select id from fieldmodels where name like "+
":field escape '\\'", field=field)
sfquery += ("select id from fields where "+
"fieldModelId in %s and value like "+
":_ff_%d escape '\\'") % (ids2str(ids), c)
rows = self.s.execute(
'select f.factId, f.value, fm.name from fields as f '+
'left join fieldmodels as fm ON (f.fieldModelId = '+
'fm.id) where f.id in (' + sfquery + ')', args)
while (1):
row = rows.fetchone()
if row is None: break
field = row[2].lower()
doesMatch = False
if field in fieldFilters:
for filter in fieldFilters[field]:
res = filter['regexp'].search(row[1])
if ((filter['is_neg'] is False and res) or
(filter['is_neg'] is True and res is None)):
fieldFilterMatches.append(row[0])
if len(cardFilters) > 0:
qaquery = ''
args = {}
for field, filters in cardFilters.iteritems():
for filter in filters:
c = len(args)
if qaquery:
if filter['is_neg']: qaquery += " except "
else: qaquery += " intersect "
elif filter['is_neg']: qaquery += "select id from cards except "
value = value.replace("*", "%")
args["_ff_%d" % c] = "%"+value+"%"
if field == 'question':
qaquery += "select id from cards where question "
qaquery += "like :_ff_%d escape '\\'" % c
else:
qaquery += "select id from cards where answer "
qaquery += "like :_ff_%d escape '\\'" % c
rows = self.s.execute(
'select id, question, answer from cards where id IN (' +
qaquery + ')', args)
while (1):
row = rows.fetchone()
if row is None: break
doesMatch = False
if field in cardFilters:
rowValue = row[1] if field == 'question' else row[2]
for filter in cardFilters[field]:
res = filter['regexp'].search(rowValue)
if ((filter['is_neg'] is False and res) or
(filter['is_neg'] is True and res is None)):
cardFilterMatches.append(row[0])
factIds = None
if len(factFilters) > 0 or len(fieldFilters) > 0:
factIds = []
factIds.extend(factFilterMatches)
factIds.extend(fieldFilterMatches)
cardIds = None
if len(cardFilters) > 0:
cardIds = []
cardIds.extend(cardFilterMatches)
return (factIds, cardIds)
def _findCards(self, query): def _findCards(self, query):
"Find facts matching QUERY." "Find facts matching QUERY."
tquery = "" tquery = ""
fquery = "" fquery = ""
qquery = "" qquery = ""
fidquery = "" fidquery = ""
cmquery = { 'pos': '', 'neg': '' }
sfquery = qaquery = ""
showdistinct = False
filters = []
args = {} args = {}
for c, (token, isNeg, type) in enumerate(self._parseQuery(query)): for c, (token, isNeg, type, filter) in enumerate(self._parseQuery(query)):
if type == SEARCH_TAG: if type == SEARCH_TAG:
# a tag # a tag
if tquery: if tquery:
@ -2161,16 +2490,11 @@ where id = :id""", pending)
elif isNeg: elif isNeg:
tquery += "select id from cards except " tquery += "select id from cards except "
if token == "none": if token == "none":
tquery += """ tquery += """select cards.id from cards, facts where facts.tags = '' and cards.factId = facts.id """
select cards.id from cards, facts where facts.tags = ''
and cards.factId = facts.id """
else: else:
token = token.replace("*", "%") token = token.replace("*", "%")
ids = self.s.column0( ids = self.s.column0("select id from tags where tag like :tag escape '\\'", tag=token)
"select id from tags where tag like :tag", tag=token) tquery += """select cardId from cardTags where cardTags.tagId in %s""" % ids2str(ids)
tquery += """
select cardId from cardTags where
cardTags.tagId in %s""" % ids2str(ids)
elif type == SEARCH_TYPE: elif type == SEARCH_TYPE:
if qquery: if qquery:
if isNeg: if isNeg:
@ -2189,15 +2513,19 @@ cardTags.tagId in %s""" % ids2str(ids)
qquery += "select id from cards where type = %d" % n qquery += "select id from cards where type = %d" % n
elif token == "delayed": elif token == "delayed":
qquery += ("select id from cards where " qquery += ("select id from cards where "
"due < %d and combinedDue > %d and " "due < %d and isDue = 0 and "
"type in (0,1,2)") % ( "priority in (1,2,3,4)") % time.time()
self.dueCutoff, self.dueCutoff)
elif token == "suspended": elif token == "suspended":
qquery += ("select id from cards where " qquery += ("select id from cards where "
"priority = -3") "priority = -3")
elif token == "inactive":
qquery += ("select id from cards where "
"priority = 0")
elif token == "leech":
qquery += ("select id from cards where noCount >= (select value from deckvars where key = \"leechFails\")")
else: # due else: # due
qquery += ("select id from cards where " qquery += ("select id from cards where "
"type in (0,1) and combinedDue < %d") % self.dueCutoff "type in (0,1) and isDue = 1")
elif type == SEARCH_FID: elif type == SEARCH_FID:
if fidquery: if fidquery:
if isNeg: if isNeg:
@ -2207,44 +2535,83 @@ cardTags.tagId in %s""" % ids2str(ids)
elif isNeg: elif isNeg:
fidquery += "select id from cards except " fidquery += "select id from cards except "
fidquery += "select id from cards where factId = %s" % token fidquery += "select id from cards where factId = %s" % token
else: elif type == SEARCH_CARD:
# a field
if fquery:
if isNeg:
fquery += " except "
else:
fquery += " intersect "
elif isNeg:
fquery += "select id from facts except "
token = token.replace("*", "%") token = token.replace("*", "%")
token = token.replace("?", "_") ids = self.s.column0("select id from tags where tag like :tag escape '\\'", tag=token)
args["_ff_%d" % c] = "%"+token+"%" if isNeg:
q = "select factId from fields where value like :_ff_%d" % c if cmquery['neg']:
fquery += q cmquery['neg'] += " intersect "
return (tquery, fquery, qquery, fidquery, args) cmquery['neg'] += """select cardId from cardTags where src = 2 and cardTags.tagId in %s""" % ids2str(ids)
else:
if cmquery['pos']:
cmquery['pos'] += " intersect "
cmquery['pos'] += """select cardId from cardTags where src = 2 and cardTags.tagId in %s""" % ids2str(ids)
elif type == SEARCH_FIELD or type == SEARCH_FIELD_EXISTS:
field = value = ''
if type == SEARCH_FIELD:
parts = token.split(':', 1);
if len(parts) == 2:
field = parts[0]
value = parts[1]
elif type == SEARCH_FIELD_EXISTS:
field = token
value = '*'
if (type == SEARCH_FIELD and filter != 'none'):
if field and value: filters.append({'scope': 'field', 'type': filter, 'field': field, 'value': value, 'is_neg': isNeg})
else:
if field and value:
if sfquery:
if isNeg:
sfquery += " except "
else:
sfquery += " intersect "
elif isNeg:
sfquery += "select id from facts except "
field = field.replace("*", "%")
value = value.replace("*", "%")
args["_ff_%d" % c] = "%"+value+"%"
ids = self.s.column0("select id from fieldmodels where name like :field escape '\\'", field=field)
sfquery += "select factId from fields where fieldModelId in %s and value like :_ff_%d escape '\\'" % (ids2str(ids), c)
elif type == SEARCH_QA:
field = value = ''
parts = token.split(':', 1);
if len(parts) == 2:
field = parts[0]
value = parts[1]
if (filter != 'none'):
if field and value: filters.append({'scope': 'card', 'type': filter, 'field': field, 'value': value, 'is_neg': isNeg})
else:
if field and value:
if qaquery:
if isNeg:
qaquery += " except "
else:
qaquery += " intersect "
elif isNeg:
qaquery += "select id from cards except "
value = value.replace("*", "%")
args["_ff_%d" % c] = "%"+value+"%"
def findCardsWhere(self, query): if field == 'question': qaquery += "select id from cards where question like :_ff_%d escape '\\'" % c
(tquery, fquery, qquery, fidquery, args) = self._findCards(query) else: qaquery += "select id from cards where answer like :_ff_%d escape '\\'" % c
q = "" elif type == SEARCH_DISTINCT:
x = [] if isNeg is False: showdistinct = True if token == "one" else False
if tquery: else: showdistinct = False if token == "one" else True
x.append(" id in (%s)" % tquery) else:
if fquery: if (filter != 'none'):
x.append(" factId in (%s)" % fquery) filters.append({'scope': 'fact', 'type': filter, 'value': token, 'is_neg': isNeg})
if qquery: else:
x.append(" id in (%s)" % qquery) if fquery:
if fidquery: if isNeg:
x.append(" id in (%s)" % fidquery) fquery += " except "
if x: else:
q += " and ".join(x) fquery += " intersect "
return q, args elif isNeg:
fquery += "select id from facts except "
def findCards(self, query): token = token.replace("*", "%")
q, args = self.findCardsWhere(query) args["_ff_%d" % c] = "%"+token+"%"
query = "select id from cards" fquery += "select factId from fields where value like :_ff_%d escape '\\'" % c
if q: return (tquery, fquery, qquery, fidquery, cmquery, sfquery, qaquery, showdistinct, filters, args)
query += " where " + q
return self.s.column0(query, **args)
# Find and replace # Find and replace
########################################################################## ##########################################################################