mirror of
https://github.com/ankitects/anki.git
synced 2025-09-21 07:22:23 -04:00
more checksum work
- convert checksums to int - add bulk update & update on upgrade - add indices pending performance testing. The fsum table & indices add about 2MB to a deck with 50k unique fields
This commit is contained in:
parent
4becd8399c
commit
f5b326c753
5 changed files with 34 additions and 31 deletions
45
anki/deck.py
45
anki/deck.py
|
@ -10,7 +10,7 @@ from itertools import groupby
|
|||
from anki.lang import _, ngettext
|
||||
from anki.utils import parseTags, tidyHTML, ids2str, hexifyID, \
|
||||
canonifyTags, joinTags, addTags, deleteTags, checksum, fieldChecksum, \
|
||||
stripHTML, intTime
|
||||
stripHTML, intTime, splitFields
|
||||
|
||||
from anki.fonts import toPlatformFont
|
||||
from anki.hooks import runHook, hookEmpty, runFilter
|
||||
|
@ -508,9 +508,9 @@ due > :now and due < :now""", now=time.time())
|
|||
# [cid, fid, mid, tid, gid, tags, flds, data]
|
||||
data = [1, 1, fact.model.id, template.id, 1,
|
||||
"", fact.joinedFields(), ""]
|
||||
now = self.formatQA(fact.model, template, "", data)
|
||||
now = self._formatQA(fact.model, template, "", data)
|
||||
data[6] = "\x1f".join([""]*len(fact._fields))
|
||||
empty = self.formatQA(fact.model, template, "", data)
|
||||
empty = self._formatQA(fact.model, template, "", data)
|
||||
if now['q'] == empty['q']:
|
||||
continue
|
||||
if not template.conf['allowEmptyAns']:
|
||||
|
@ -929,12 +929,10 @@ where tid in %s""" % strids, now=time.time())
|
|||
for t in m.templates:
|
||||
templs[t.id] = t
|
||||
groups = dict(self.db.all("select id, name from groups"))
|
||||
return [self.formatQA(mods[row[2]], templs[row[3]], groups[row[4]], row)
|
||||
return [self._formatQA(mods[row[2]], templs[row[3]], groups[row[4]], row)
|
||||
for row in self._qaData(where)]
|
||||
# # and checksum
|
||||
# self._updateFieldChecksums(facts)
|
||||
|
||||
def formatQA(self, model, template, gname, data, filters=True):
|
||||
def _formatQA(self, model, template, gname, data, filters=True):
|
||||
"Returns hash of id, question, answer."
|
||||
# data is [cid, fid, mid, tid, gid, tags, flds, data]
|
||||
# unpack fields and create dict
|
||||
|
@ -975,23 +973,24 @@ where c.fid == f.id and f.mid == m.id and
|
|||
c.tid = t.id and c.gid = g.id
|
||||
%s""" % where)
|
||||
|
||||
def _updateFieldChecksums(self, facts):
|
||||
print "benchmark updatefieldchecksums"
|
||||
confs = {}
|
||||
# Field checksum bulk update
|
||||
##########################################################################
|
||||
|
||||
def updateFieldChecksums(self, fids):
|
||||
"Update all field checksums, after find&replace, etc."
|
||||
sfids = ids2str(fids)
|
||||
mods = {}
|
||||
for m in self.allModels():
|
||||
mods[m.id] = m
|
||||
r = []
|
||||
for (fid, map) in facts.items():
|
||||
for (fmid, val) in map.values():
|
||||
if fmid and fmid not in confs:
|
||||
confs[fmid] = simplejson.loads(self.db.scalar(
|
||||
"select conf from fields where id = ?",
|
||||
fmid))
|
||||
# if unique checking has been turned off, don't bother to
|
||||
# zero out old values
|
||||
if confs[fmid]['unique']:
|
||||
csum = fieldChecksum(val)
|
||||
r.append((csum, fid, fmid))
|
||||
self.db.executemany(
|
||||
"update fdata set csum=? where fid=? and fmid=?", r)
|
||||
for row in self._qaData(where="and f.id in "+sfids):
|
||||
fields = splitFields(row[6])
|
||||
model = mods[row[2]]
|
||||
for c, f in enumerate(model.fields):
|
||||
if f['uniq'] and fields[c]:
|
||||
r.append((row[1], model.id, fieldChecksum(fields[c])))
|
||||
self.db.execute("delete from fsums where fid in "+sfids)
|
||||
self.db.executemany("insert into fsums values (?,?,?)", r)
|
||||
|
||||
# Tags
|
||||
##########################################################################
|
||||
|
|
|
@ -58,10 +58,10 @@ insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""",
|
|||
for (ord, conf) in self._fmap.values():
|
||||
if not conf['uniq']:
|
||||
continue
|
||||
val = fieldChecksum(self._fields[ord])
|
||||
val = self._fields[ord]
|
||||
if not val:
|
||||
continue
|
||||
d.append((self.id, self.mid, val))
|
||||
d.append((self.id, self.mid, fieldChecksum(val)))
|
||||
self.deck.db.executemany("insert into fsums values (?, ?, ?)", d)
|
||||
|
||||
def cards(self):
|
||||
|
|
|
@ -187,6 +187,9 @@ create index if not exists ix_cards_fid on cards (fid);
|
|||
create index if not exists ix_revlog_cid on revlog (cid);
|
||||
-- media
|
||||
create index if not exists ix_media_csum on media (csum);
|
||||
-- unique checking
|
||||
create index if not exists ix_fsums_fid on fsums (fid);
|
||||
create index if not exists ix_fsums_csum on fsums (csum);
|
||||
""")
|
||||
|
||||
# 2.0 schema migration
|
||||
|
@ -460,7 +463,10 @@ def _rewriteModelIds(deck):
|
|||
def _postSchemaUpgrade(deck):
|
||||
"Handle the rest of the upgrade to 2.0."
|
||||
import anki.deck
|
||||
# fix up model/template ids
|
||||
_rewriteModelIds(deck)
|
||||
# update uniq cache
|
||||
deck.updateFieldChecksums(deck.db.list("select id from facts"))
|
||||
# remove old views
|
||||
for v in ("failedCards", "revCardsOld", "revCardsNew",
|
||||
"revCardsDue", "revCardsRandom", "acqCardsRandom",
|
||||
|
|
|
@ -272,10 +272,8 @@ def checksum(data):
|
|||
return md5(data).hexdigest()
|
||||
|
||||
def fieldChecksum(data):
|
||||
# 8 digit md5 hash of utf8 string, or empty string if empty value
|
||||
if not data:
|
||||
return ""
|
||||
return checksum(data.encode("utf-8"))[:8]
|
||||
# 32 bit unsigned number from first 8 digits of md5 hash
|
||||
return int(checksum(data.encode("utf-8"))[:8], 16)
|
||||
|
||||
def call(argv, wait=True, **kwargs):
|
||||
try:
|
||||
|
|
|
@ -88,7 +88,7 @@ def test_fieldChecksum():
|
|||
f['Front'] = u"new"; f['Back'] = u"new2"
|
||||
deck.addFact(f)
|
||||
assert deck.db.scalar(
|
||||
"select csum from fsums") == "22af645d"
|
||||
"select csum from fsums") == int("22af645d", 16)
|
||||
# empty field should have no checksum
|
||||
f['Front'] = u""
|
||||
f.flush()
|
||||
|
@ -98,7 +98,7 @@ def test_fieldChecksum():
|
|||
f['Front'] = u"newx"
|
||||
f.flush()
|
||||
assert deck.db.scalar(
|
||||
"select csum from fsums") == "4b0e5a4c"
|
||||
"select csum from fsums") == int("4b0e5a4c", 16)
|
||||
# turning off unique and modifying the fact should delete the sum
|
||||
f.model.fields[0]['uniq'] = False
|
||||
f.model.flush()
|
||||
|
|
Loading…
Reference in a new issue