mirror of
https://github.com/ankitects/anki.git
synced 2025-09-21 15:32:23 -04:00
more checksum work
- convert checksums to int - add bulk update & update on upgrade - add indices pending performance testing. The fsum table & indices add about 2MB to a deck with 50k unique fields
This commit is contained in:
parent
4becd8399c
commit
f5b326c753
5 changed files with 34 additions and 31 deletions
45
anki/deck.py
45
anki/deck.py
|
@ -10,7 +10,7 @@ from itertools import groupby
|
||||||
from anki.lang import _, ngettext
|
from anki.lang import _, ngettext
|
||||||
from anki.utils import parseTags, tidyHTML, ids2str, hexifyID, \
|
from anki.utils import parseTags, tidyHTML, ids2str, hexifyID, \
|
||||||
canonifyTags, joinTags, addTags, deleteTags, checksum, fieldChecksum, \
|
canonifyTags, joinTags, addTags, deleteTags, checksum, fieldChecksum, \
|
||||||
stripHTML, intTime
|
stripHTML, intTime, splitFields
|
||||||
|
|
||||||
from anki.fonts import toPlatformFont
|
from anki.fonts import toPlatformFont
|
||||||
from anki.hooks import runHook, hookEmpty, runFilter
|
from anki.hooks import runHook, hookEmpty, runFilter
|
||||||
|
@ -508,9 +508,9 @@ due > :now and due < :now""", now=time.time())
|
||||||
# [cid, fid, mid, tid, gid, tags, flds, data]
|
# [cid, fid, mid, tid, gid, tags, flds, data]
|
||||||
data = [1, 1, fact.model.id, template.id, 1,
|
data = [1, 1, fact.model.id, template.id, 1,
|
||||||
"", fact.joinedFields(), ""]
|
"", fact.joinedFields(), ""]
|
||||||
now = self.formatQA(fact.model, template, "", data)
|
now = self._formatQA(fact.model, template, "", data)
|
||||||
data[6] = "\x1f".join([""]*len(fact._fields))
|
data[6] = "\x1f".join([""]*len(fact._fields))
|
||||||
empty = self.formatQA(fact.model, template, "", data)
|
empty = self._formatQA(fact.model, template, "", data)
|
||||||
if now['q'] == empty['q']:
|
if now['q'] == empty['q']:
|
||||||
continue
|
continue
|
||||||
if not template.conf['allowEmptyAns']:
|
if not template.conf['allowEmptyAns']:
|
||||||
|
@ -929,12 +929,10 @@ where tid in %s""" % strids, now=time.time())
|
||||||
for t in m.templates:
|
for t in m.templates:
|
||||||
templs[t.id] = t
|
templs[t.id] = t
|
||||||
groups = dict(self.db.all("select id, name from groups"))
|
groups = dict(self.db.all("select id, name from groups"))
|
||||||
return [self.formatQA(mods[row[2]], templs[row[3]], groups[row[4]], row)
|
return [self._formatQA(mods[row[2]], templs[row[3]], groups[row[4]], row)
|
||||||
for row in self._qaData(where)]
|
for row in self._qaData(where)]
|
||||||
# # and checksum
|
|
||||||
# self._updateFieldChecksums(facts)
|
|
||||||
|
|
||||||
def formatQA(self, model, template, gname, data, filters=True):
|
def _formatQA(self, model, template, gname, data, filters=True):
|
||||||
"Returns hash of id, question, answer."
|
"Returns hash of id, question, answer."
|
||||||
# data is [cid, fid, mid, tid, gid, tags, flds, data]
|
# data is [cid, fid, mid, tid, gid, tags, flds, data]
|
||||||
# unpack fields and create dict
|
# unpack fields and create dict
|
||||||
|
@ -975,23 +973,24 @@ where c.fid == f.id and f.mid == m.id and
|
||||||
c.tid = t.id and c.gid = g.id
|
c.tid = t.id and c.gid = g.id
|
||||||
%s""" % where)
|
%s""" % where)
|
||||||
|
|
||||||
def _updateFieldChecksums(self, facts):
|
# Field checksum bulk update
|
||||||
print "benchmark updatefieldchecksums"
|
##########################################################################
|
||||||
confs = {}
|
|
||||||
|
def updateFieldChecksums(self, fids):
|
||||||
|
"Update all field checksums, after find&replace, etc."
|
||||||
|
sfids = ids2str(fids)
|
||||||
|
mods = {}
|
||||||
|
for m in self.allModels():
|
||||||
|
mods[m.id] = m
|
||||||
r = []
|
r = []
|
||||||
for (fid, map) in facts.items():
|
for row in self._qaData(where="and f.id in "+sfids):
|
||||||
for (fmid, val) in map.values():
|
fields = splitFields(row[6])
|
||||||
if fmid and fmid not in confs:
|
model = mods[row[2]]
|
||||||
confs[fmid] = simplejson.loads(self.db.scalar(
|
for c, f in enumerate(model.fields):
|
||||||
"select conf from fields where id = ?",
|
if f['uniq'] and fields[c]:
|
||||||
fmid))
|
r.append((row[1], model.id, fieldChecksum(fields[c])))
|
||||||
# if unique checking has been turned off, don't bother to
|
self.db.execute("delete from fsums where fid in "+sfids)
|
||||||
# zero out old values
|
self.db.executemany("insert into fsums values (?,?,?)", r)
|
||||||
if confs[fmid]['unique']:
|
|
||||||
csum = fieldChecksum(val)
|
|
||||||
r.append((csum, fid, fmid))
|
|
||||||
self.db.executemany(
|
|
||||||
"update fdata set csum=? where fid=? and fmid=?", r)
|
|
||||||
|
|
||||||
# Tags
|
# Tags
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
|
@ -58,10 +58,10 @@ insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
for (ord, conf) in self._fmap.values():
|
for (ord, conf) in self._fmap.values():
|
||||||
if not conf['uniq']:
|
if not conf['uniq']:
|
||||||
continue
|
continue
|
||||||
val = fieldChecksum(self._fields[ord])
|
val = self._fields[ord]
|
||||||
if not val:
|
if not val:
|
||||||
continue
|
continue
|
||||||
d.append((self.id, self.mid, val))
|
d.append((self.id, self.mid, fieldChecksum(val)))
|
||||||
self.deck.db.executemany("insert into fsums values (?, ?, ?)", d)
|
self.deck.db.executemany("insert into fsums values (?, ?, ?)", d)
|
||||||
|
|
||||||
def cards(self):
|
def cards(self):
|
||||||
|
|
|
@ -187,6 +187,9 @@ create index if not exists ix_cards_fid on cards (fid);
|
||||||
create index if not exists ix_revlog_cid on revlog (cid);
|
create index if not exists ix_revlog_cid on revlog (cid);
|
||||||
-- media
|
-- media
|
||||||
create index if not exists ix_media_csum on media (csum);
|
create index if not exists ix_media_csum on media (csum);
|
||||||
|
-- unique checking
|
||||||
|
create index if not exists ix_fsums_fid on fsums (fid);
|
||||||
|
create index if not exists ix_fsums_csum on fsums (csum);
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# 2.0 schema migration
|
# 2.0 schema migration
|
||||||
|
@ -460,7 +463,10 @@ def _rewriteModelIds(deck):
|
||||||
def _postSchemaUpgrade(deck):
|
def _postSchemaUpgrade(deck):
|
||||||
"Handle the rest of the upgrade to 2.0."
|
"Handle the rest of the upgrade to 2.0."
|
||||||
import anki.deck
|
import anki.deck
|
||||||
|
# fix up model/template ids
|
||||||
_rewriteModelIds(deck)
|
_rewriteModelIds(deck)
|
||||||
|
# update uniq cache
|
||||||
|
deck.updateFieldChecksums(deck.db.list("select id from facts"))
|
||||||
# remove old views
|
# remove old views
|
||||||
for v in ("failedCards", "revCardsOld", "revCardsNew",
|
for v in ("failedCards", "revCardsOld", "revCardsNew",
|
||||||
"revCardsDue", "revCardsRandom", "acqCardsRandom",
|
"revCardsDue", "revCardsRandom", "acqCardsRandom",
|
||||||
|
|
|
@ -272,10 +272,8 @@ def checksum(data):
|
||||||
return md5(data).hexdigest()
|
return md5(data).hexdigest()
|
||||||
|
|
||||||
def fieldChecksum(data):
|
def fieldChecksum(data):
|
||||||
# 8 digit md5 hash of utf8 string, or empty string if empty value
|
# 32 bit unsigned number from first 8 digits of md5 hash
|
||||||
if not data:
|
return int(checksum(data.encode("utf-8"))[:8], 16)
|
||||||
return ""
|
|
||||||
return checksum(data.encode("utf-8"))[:8]
|
|
||||||
|
|
||||||
def call(argv, wait=True, **kwargs):
|
def call(argv, wait=True, **kwargs):
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -88,7 +88,7 @@ def test_fieldChecksum():
|
||||||
f['Front'] = u"new"; f['Back'] = u"new2"
|
f['Front'] = u"new"; f['Back'] = u"new2"
|
||||||
deck.addFact(f)
|
deck.addFact(f)
|
||||||
assert deck.db.scalar(
|
assert deck.db.scalar(
|
||||||
"select csum from fsums") == "22af645d"
|
"select csum from fsums") == int("22af645d", 16)
|
||||||
# empty field should have no checksum
|
# empty field should have no checksum
|
||||||
f['Front'] = u""
|
f['Front'] = u""
|
||||||
f.flush()
|
f.flush()
|
||||||
|
@ -98,7 +98,7 @@ def test_fieldChecksum():
|
||||||
f['Front'] = u"newx"
|
f['Front'] = u"newx"
|
||||||
f.flush()
|
f.flush()
|
||||||
assert deck.db.scalar(
|
assert deck.db.scalar(
|
||||||
"select csum from fsums") == "4b0e5a4c"
|
"select csum from fsums") == int("4b0e5a4c", 16)
|
||||||
# turning off unique and modifying the fact should delete the sum
|
# turning off unique and modifying the fact should delete the sum
|
||||||
f.model.fields[0]['uniq'] = False
|
f.model.fields[0]['uniq'] = False
|
||||||
f.model.flush()
|
f.model.flush()
|
||||||
|
|
Loading…
Reference in a new issue