add a field cache for searching without html

This commit is contained in:
Damien Elmes 2010-10-27 19:00:49 +09:00
parent 9f9841cb22
commit 8ce0ff5b8b
3 changed files with 31 additions and 4 deletions

View file

@ -56,7 +56,7 @@ SEARCH_TAG = 0
SEARCH_TYPE = 1
SEARCH_PHRASE = 2
SEARCH_FID = 3
DECK_VERSION = 47
DECK_VERSION = 48
deckVarsTable = Table(
'deckVars', metadata,
@ -1829,13 +1829,28 @@ order by fields.factId""" % ids2str([x[2] for x in ids])),
pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid])
for (cid, cmid, fid, mid) in ids]
if pend:
# update q/a
self.s.execute("""
update cards set
question = :question, answer = :answer
%s
where id = :id""" % mod, pend)
# update fields cache
self.updateFieldCache(facts.keys())
self.flushMod()
def updateFieldCache(self, fids):
"Add stripped HTML cache for sorting/searching."
all = self.s.all(
("select factId, group_concat(value, ' ') from fields "
"where factId in %s group by factId") % ids2str(fids))
r = []
from anki.utils import stripHTMLMedia
for a in all:
r.append({'id':a[0], 'v':stripHTMLMedia(a[1])})
self.s.statements(
"update facts set spaceUntil=:v where id=:id", r)
def rebuildCardOrdinals(self, ids):
"Update all card models in IDS. Caller must update model modtime."
self.s.flush()
@ -3604,6 +3619,10 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
deck.s.execute("analyze")
deck.version = 47
deck.s.commit()
if deck.version < 48:
deck.updateFieldCache(deck.s.column0("select id from facts"))
deck.version = 48
deck.s.commit()
# executing a pragma here is very slow on large decks, so we store
# our own record
if not deck.getInt("pageSize") == 4096:

View file

@ -12,7 +12,7 @@ import time
from anki.db import *
from anki.errors import *
from anki.models import Model, FieldModel, fieldModelsTable, formatQA
from anki.utils import genID
from anki.utils import genID, stripHTMLMedia
from anki.hooks import runHook
# Fields in a fact
@ -56,8 +56,9 @@ factsTable = Table(
Column('created', Float, nullable=False, default=time.time),
Column('modified', Float, nullable=False, default=time.time),
Column('tags', UnicodeText, nullable=False, default=u""),
# the following two fields are obsolete and now stored in cards table
Column('spaceUntil', Float, nullable=False, default=0),
# spaceUntil is reused as a html-stripped cache of the fields
Column('spaceUntil', UnicodeText, nullable=False, default=u""),
# obsolete
Column('lastCardId', Integer, ForeignKey(
"cards.id", use_alter=True, name="lastCardIdfk")))
@ -135,6 +136,7 @@ class Fact(object):
d = {}
for f in self.model.fieldModels:
d[f.name] = (f.id, self[f.name])
self.spaceUntil = stripHTMLMedia(u" ".join([x[1] for x in d.values()]))
for card in self.cards:
qa = formatQA(None, self.modelId, d, card.splitTags(), card.cardModel)
card.question = qa['question']

View file

@ -128,11 +128,17 @@ def fmtFloat(float_value, point=1):
def stripHTML(s):
s = re.sub("(?s)<style.*?>.*?</style>", "", s)
s = re.sub("(?s)<script.*?>.*?</script>", "", s)
s = re.sub("<.*?>", "", s)
s = s.replace("&lt;", "<")
s = s.replace("&gt;", ">")
return s
def stripHTMLMedia(s):
"Strip HTML but keep media filenames"
s = re.sub("<img src=[\"']?([^\"'>]+)[\"']? ?/?>", " \\1 ", s)
return stripHTML(s)
def tidyHTML(html):
"Remove cruft like body tags and return just the important part."
# contents of body - no head or html tags