add a field cache for searching without html

This commit is contained in:
Damien Elmes 2010-10-27 19:00:49 +09:00
parent 9f9841cb22
commit 8ce0ff5b8b
3 changed files with 31 additions and 4 deletions

View file

@ -56,7 +56,7 @@ SEARCH_TAG = 0
SEARCH_TYPE = 1 SEARCH_TYPE = 1
SEARCH_PHRASE = 2 SEARCH_PHRASE = 2
SEARCH_FID = 3 SEARCH_FID = 3
DECK_VERSION = 47 DECK_VERSION = 48
deckVarsTable = Table( deckVarsTable = Table(
'deckVars', metadata, 'deckVars', metadata,
@ -1829,13 +1829,28 @@ order by fields.factId""" % ids2str([x[2] for x in ids])),
pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid]) pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid])
for (cid, cmid, fid, mid) in ids] for (cid, cmid, fid, mid) in ids]
if pend: if pend:
# update q/a
self.s.execute(""" self.s.execute("""
update cards set update cards set
question = :question, answer = :answer question = :question, answer = :answer
%s %s
where id = :id""" % mod, pend) where id = :id""" % mod, pend)
# update fields cache
self.updateFieldCache(facts.keys())
self.flushMod() self.flushMod()
def updateFieldCache(self, fids):
"Add stripped HTML cache for sorting/searching."
all = self.s.all(
("select factId, group_concat(value, ' ') from fields "
"where factId in %s group by factId") % ids2str(fids))
r = []
from anki.utils import stripHTMLMedia
for a in all:
r.append({'id':a[0], 'v':stripHTMLMedia(a[1])})
self.s.statements(
"update facts set spaceUntil=:v where id=:id", r)
def rebuildCardOrdinals(self, ids): def rebuildCardOrdinals(self, ids):
"Update all card models in IDS. Caller must update model modtime." "Update all card models in IDS. Caller must update model modtime."
self.s.flush() self.s.flush()
@ -3604,6 +3619,10 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
deck.s.execute("analyze") deck.s.execute("analyze")
deck.version = 47 deck.version = 47
deck.s.commit() deck.s.commit()
if deck.version < 48:
deck.updateFieldCache(deck.s.column0("select id from facts"))
deck.version = 48
deck.s.commit()
# executing a pragma here is very slow on large decks, so we store # executing a pragma here is very slow on large decks, so we store
# our own record # our own record
if not deck.getInt("pageSize") == 4096: if not deck.getInt("pageSize") == 4096:

View file

@ -12,7 +12,7 @@ import time
from anki.db import * from anki.db import *
from anki.errors import * from anki.errors import *
from anki.models import Model, FieldModel, fieldModelsTable, formatQA from anki.models import Model, FieldModel, fieldModelsTable, formatQA
from anki.utils import genID from anki.utils import genID, stripHTMLMedia
from anki.hooks import runHook from anki.hooks import runHook
# Fields in a fact # Fields in a fact
@ -56,8 +56,9 @@ factsTable = Table(
Column('created', Float, nullable=False, default=time.time), Column('created', Float, nullable=False, default=time.time),
Column('modified', Float, nullable=False, default=time.time), Column('modified', Float, nullable=False, default=time.time),
Column('tags', UnicodeText, nullable=False, default=u""), Column('tags', UnicodeText, nullable=False, default=u""),
# the following two fields are obsolete and now stored in cards table # spaceUntil is reused as a html-stripped cache of the fields
Column('spaceUntil', Float, nullable=False, default=0), Column('spaceUntil', UnicodeText, nullable=False, default=u""),
# obsolete
Column('lastCardId', Integer, ForeignKey( Column('lastCardId', Integer, ForeignKey(
"cards.id", use_alter=True, name="lastCardIdfk"))) "cards.id", use_alter=True, name="lastCardIdfk")))
@ -135,6 +136,7 @@ class Fact(object):
d = {} d = {}
for f in self.model.fieldModels: for f in self.model.fieldModels:
d[f.name] = (f.id, self[f.name]) d[f.name] = (f.id, self[f.name])
self.spaceUntil = stripHTMLMedia(u" ".join([x[1] for x in d.values()]))
for card in self.cards: for card in self.cards:
qa = formatQA(None, self.modelId, d, card.splitTags(), card.cardModel) qa = formatQA(None, self.modelId, d, card.splitTags(), card.cardModel)
card.question = qa['question'] card.question = qa['question']

View file

@ -128,11 +128,17 @@ def fmtFloat(float_value, point=1):
def stripHTML(s): def stripHTML(s):
s = re.sub("(?s)<style.*?>.*?</style>", "", s) s = re.sub("(?s)<style.*?>.*?</style>", "", s)
s = re.sub("(?s)<script.*?>.*?</script>", "", s)
s = re.sub("<.*?>", "", s) s = re.sub("<.*?>", "", s)
s = s.replace("&lt;", "<") s = s.replace("&lt;", "<")
s = s.replace("&gt;", ">") s = s.replace("&gt;", ">")
return s return s
def stripHTMLMedia(s):
"Strip HTML but keep media filenames"
s = re.sub("<img src=[\"']?([^\"'>]+)[\"']? ?/?>", " \\1 ", s)
return stripHTML(s)
def tidyHTML(html): def tidyHTML(html):
"Remove cruft like body tags and return just the important part." "Remove cruft like body tags and return just the important part."
# contents of body - no head or html tags # contents of body - no head or html tags