change field storage format, improve upgrade speed

Since Anki first moved to an SQL backend, it has stored fields in a fields
table, with one field per line. This is a natural layout in a relational
database, and it had some nice properties. It meant we could retrieve an
individual field of a fact, which we used for limiting searches to a
particular field, for sorting, and for determining if a field was unique, by
adding an index on the field value.

The index was very expensive, so as part of the early work towards 2.0 I added
a checksum field instead, and added an index to that. This was a lot cheaper
than storing the entire value twice for the purpose of fast searches, but it
only partly solved the problem. We still needed an index on factId so that we
could retrieve a given fact's fields quickly. For simple models this was
fairly cheap, but as the number of fields grows the table grows very big. 25k
facts with 30 fields each and the fields table has grown to 750k entries. This
makes the factId index and checksum index really expensive - with the q/a
cache removed, about 30% of the deck in such a situation.

Equally problematic was sorting on those fields. Short of adding another
expensive index, a sort involves a table scan of the entire table.

We solve these problems by moving all fields into the facts table. For this to
work, we need to address some issues:

Sorting: we'll add an option to the model to specify the sort field. When
facts are modified, that field is written to a separate sort column. It can be
HTML stripped, and possibly truncated to a maximum number of letters. This
means that switching sort to a different field involves an expensive rewrite
of the sort column, but people tend to leave their sort field set to the same
value, and we don't need to clear the field if the user switches temporarily
to a non-field sort like due order. And it has the nice properties of allowing
different models to be sorted on different columns at the same time, and
makes it impossible for models to be hidden because the user has sorted on a
field which doesn't appear in some models.

Searching for words with embedded HTML: 1.2 introduced a HTML-stripped cache
of the fields content, which both sped up searches (since we didn't have to
search the possibly large fields table), and meant we could find "bob" in
"b<b>ob</b>" quickly. The ability to quickly search for words peppered with
HTML was nice, but it meant doubling the cost of storing text in many cases,
and meant after any edit more data has to be written to the DB. Instead, we'll
do it on the fly. On this i7 computer, stripping HTML from all fields takes
1-2.6 seconds on 25-50k decks. We could possibly skip the stripping for people
who don't require it - the number of people who bold parts of words is
actually pretty small.

Duplicate detection: one option would be to fetch all fields when the add
cards dialog or editor are opened. But this will be expensive on mobile
devices. Instead, we'll create a separate table of (fid, csum), with an index
on both columns. When we edit a fact, we delete all the existing checksums for
that fact, and add checksums for any fields that must be checked as unique. We
could optionally skip the index on csum - some benchmarking is required.

As for the new table layout, creating separate columns for each field won't
scale. Instead, we store the fields in a single column, separated by an ascii
record separator. We split on that character when extracting from
the database, and join on it when writing to the DB.

Searching on a particular field in the browser will be accomplished by finding
all facts that match, and then unpacking to see if the relevant field matched.

Tags have been moved back to a separate column. Now that fields are on the
facts table, there is no need to pack them in as a field simply to avoid
another table hit.
This commit is contained in:
Damien Elmes 2011-03-10 06:28:25 +09:00
parent 59754eacb2
commit 1078285f0f
10 changed files with 218 additions and 305 deletions

View file

@ -112,10 +112,8 @@ streak=?, lapses=?, grade=?, cycles=? where id = ?""",
def _getQA(self, reload=False):
# this is a hack at the moment
if not self._qa or reload:
self._qa = self.deck.formatQA(
self.id,
self.deck._cacheFacts([self.fid])[self.fid],
self.deck._cacheMeta("and c.id = %d" % self.id)[2][self.id])
self._qa = self.deck.updateCache(
[self.id], "card")[0]
return self._qa
def fact(self):

View file

@ -148,6 +148,11 @@ qconf=?, conf=?, data=?""",
# unsorted
##########################################################################
def nextID(self, type):
id = self.conf.get(type, 1)
self.conf[type] = id+1
return id
def reset(self):
self.sched.reset()
# recache css
@ -500,14 +505,12 @@ due > :now and due < :now""", now=time.time())
ok = []
for template in fact.model.templates:
if template.active or not checkActive:
# [cid, fid, qfmt, afmt, tags, model, template, group]
meta = [None, template.qfmt, template.afmt,
"", "", "", ""]
fields = fact.fieldsWithIds()
now = self.formatQA(None, fields, meta, False)
for k in fields.keys():
fields[k] = (fields[k][0], "")
empty = self.formatQA(None, fields, meta, False)
# [cid, fid, mid, tid, gid, tags, flds, data]
data = [1, 1, fact.model.id, template.id, 1,
"", fact.joinedFields(), ""]
now = self.formatQA(fact.model, template, "", data)
data[6] = "\x1f".join([""]*len(fact._fields))
empty = self.formatQA(fact.model, template, "", data)
if now['q'] == empty['q']:
continue
if not template.conf['allowEmptyAns']:
@ -557,7 +560,7 @@ where fid = :fid and tid = :cmid""",
return
strids = ids2str(ids)
self.db.execute("delete from facts where id in %s" % strids)
self.db.execute("delete from fdata where fid in %s" % strids)
#self.db.execute("delete from fdata where fid in %s" % strids)
def _deleteDanglingFacts(self):
"Delete any facts without cards. Don't call this directly."
@ -659,7 +662,6 @@ select id from cards where fid in (select id from facts where mid = ?)""",
# then the model
self.db.execute("delete from models where id = ?", mid)
self.db.execute("delete from templates where mid = ?", mid)
self.db.execute("delete from fields where mid = ?", mid)
# GUI should ensure last model is not deleted
if self.conf['currentModelId'] == mid:
self.conf['currentModelId'] = self.db.scalar(
@ -904,7 +906,7 @@ where tid in %s""" % strids, now=time.time())
# Caches: q/a, facts.cache and fdata.csum
##########################################################################
def updateCache(self, ids, type="card"):
def updateCache(self, ids=None, type="card"):
"Update cache after facts or models changed."
# gather metadata
if type == "card":
@ -913,87 +915,62 @@ where tid in %s""" % strids, now=time.time())
where = "and f.id in " + ids2str(ids)
elif type == "model":
where = "and m.id in " + ids2str(ids)
(cids, fids, meta) = self._cacheMeta(where)
if not cids:
return
# and fact info
facts = self._cacheFacts(fids)
# generate q/a
pend = [self.formatQA(cids[n], facts[fids[n]], meta[cids[n]])
for n in range(len(cids))]
for p in pend:
self.media.registerText(p['q'])
self.media.registerText(p['a'])
# fact value cache
self._updateFieldCache(facts)
# and checksum
self._updateFieldChecksums(facts)
elif type == "all":
where = ""
else:
raise Exception()
mods = {}
templs = {}
for m in self.allModels():
mods[m.id] = m
for t in m.templates:
templs[t.id] = t
groups = dict(self.db.all("select id, name from groups"))
return [self.formatQA(mods[row[2]], templs[row[3]], groups[row[4]], row)
for row in self._qaData(where)]
# # and checksum
# self._updateFieldChecksums(facts)
def formatQA(self, cardId, fact, meta, filters=True):
def formatQA(self, model, template, gname, data, filters=True):
"Returns hash of id, question, answer."
d = {'id': cardId}
# data is [cid, fid, mid, tid, gid, tags, flds, data]
# unpack fields and create dict
flist = data[6].split("\x1f")
fields = {}
tags = None
for (k, v) in fact.items():
if k == None:
tags = v[1]
continue
fields["text:"+k] = stripHTML(v[1])
if v[1]:
fields[k] = '<span class="fm%s">%s</span>' % (
hexifyID(v[0]), v[1])
for (name, (idx, conf)) in model.fieldMap().items():
fields[name] = flist[idx]
fields["text:"+name] = stripHTML(fields[name])
if fields[name]:
fields["text:"+name] = stripHTML(fields[name])
fields[name] = '<span class="fm%s-%s">%s</span>' % (
hexifyID(data[2]), hexifyID(idx), fields[name])
else:
fields[k] = u""
fields['Tags'] = tags
fields['Model'] = meta[3]
fields['Template'] = meta[4]
fields['Group'] = meta[5]
fields["text:"+name] = ""
fields[name] = ""
fields['Tags'] = data[5]
fields['Model'] = model.name
fields['Template'] = template.name
fields['Group'] = gname
# render q & a
for (type, format) in (("q", meta[1]), ("a", meta[2])):
if filters:
fields = runFilter("formatQA.pre", fields, meta, self)
d = dict(id=data[0])
for (type, format) in (("q", template.qfmt), ("a", template.afmt)):
# if filters:
# fields = runFilter("formatQA.pre", fields, , self)
html = anki.template.render(format, fields)
if filters:
d[type] = runFilter("formatQA.post", html, fields, meta, self)
# if filters:
# d[type] = runFilter("formatQA.post", html, fields, meta, self)
self.media.registerText(html)
d[type] = html
return d
def _cacheMeta(self, where=""):
"Return cids, fids, and cid -> data hash."
# data is [fid, qfmt, afmt, model, template, group]
meta = {}
cids = []
fids = []
for r in self.db.execute("""
select c.id, f.id, t.qfmt, t.afmt, m.name, t.name, g.name
from cards c, facts f, models m, templates t, groups g where
c.fid == f.id and f.mid == m.id and
def _qaData(self, where=""):
"Return [cid, fid, mid, tid, gid, tags, flds, data] db query"
return self.db.execute("""
select c.id, f.id, m.id, t.id, g.id, f.tags, f.flds, f.data
from cards c, facts f, models m, templates t, groups g
where c.fid == f.id and f.mid == m.id and
c.tid = t.id and c.gid = g.id
%s""" % where):
meta[r[0]] = r[1:]
cids.append(r[0])
fids.append(r[1])
return (cids, fids, meta)
def _cacheFacts(self, ids):
"Return a hash of fid -> (name -> (id, val))."
facts = {}
for id, fields in groupby(self.db.all("""
select fdata.fid, fields.name, fields.id, fdata.val
from fdata left outer join fields on fdata.fmid = fields.id
where fdata.fid in %s order by fdata.fid""" % ids2str(ids)), itemgetter(0)):
facts[id] = dict([(f[1], f[2:]) for f in fields])
return facts
def _updateFieldCache(self, facts):
"Add stripped HTML cache for searching."
r = []
from anki.utils import stripHTMLMedia
[r.append((stripHTMLMedia(
" ".join([x[1] for x in map.values()])), id))
for (id, map) in facts.items()]
self.db.executemany(
"update facts set cache=? where id=?", r)
%s""" % where)
def _updateFieldChecksums(self, facts):
print "benchmark updatefieldchecksums"
@ -1055,26 +1032,23 @@ insert or ignore into tags (mod, name) values (%d, :t)""" % intTime(),
self.registerTags(newTags)
# find facts missing the tags
if add:
l = "val not "
l = "tags not "
fn = addTags
else:
l = "val "
l = "tags "
fn = deleteTags
lim = " or ".join(
[l+"like :_%d" % c for c, t in enumerate(newTags)])
res = self.db.all(
"select fid, val from fdata where ord = -1 and " + lim,
"select id, tags from facts where " + lim,
**dict([("_%d" % x, '%% %s %%' % y) for x, y in enumerate(newTags)]))
# update tags
fids = []
def fix(row):
fids.append(row[0])
return {'id': row[0], 't': fn(tags, row[1])}
return {'id': row[0], 't': fn(tags, row[1]), 'n':intTime()}
self.db.executemany("""
update fdata set val = :t
where fid = :id""", [fix(row) for row in res])
self.db.execute("update facts set mod = ? where id in " +
ids2str(fids), intTime())
update facts set tags = :t, mod = :n where id = :id""", [fix(row) for row in res])
# update q/a cache
self.updateCache(fids, type="fact")
self.finishProgress()

View file

@ -24,40 +24,46 @@ class Fact(object):
self.tags = ""
self.cache = ""
self._fields = [""] * len(self.model.fields)
self.data = ""
self._fmap = self.model.fieldMap()
def load(self):
(self.mid,
self.crt,
self.mod) = self.deck.db.first("""
select mid, crt, mod from facts where id = ?""", self.id)
self._fields = self.deck.db.list("""
select val from fdata where fid = ? and fmid order by ord""", self.id)
self.tags = self.deck.db.scalar("""
select val from fdata where fid = ? and ord = -1""", self.id)
self.mod,
self.tags,
self._fields,
self.data) = self.deck.db.first("""
select mid, crt, mod, tags, flds, data from facts where id = ?""", self.id)
self._fields = self._field.split("\x1f")
self.model = self.deck.getModel(self.mid)
def flush(self, cache=True):
self.mod = intTime()
# facts table
self.cache = stripHTMLMedia(u" ".join(self._fields))
sfld = self._fields[self.model.sortField()]
res = self.deck.db.execute("""
insert or replace into facts values (?, ?, ?, ?, ?)""",
self.id, self.mid, self.crt,
self.mod, self.cache)
insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""",
self.id, self.mid, self.crt,
self.mod, self.tags, self.joinedFields(),
sfld, self.data)
self.id = res.lastrowid
# fdata table
self.deck.db.execute("delete from fdata where fid = ?", self.id)
d = []
for (fmid, ord, conf) in self._fmap.values():
val = self._fields[ord]
d.append(dict(fid=self.id, fmid=fmid, ord=ord,
val=val))
d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags))
self.deck.db.executemany("""
insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
# media and caches
self.deck.updateCache([self.id], "fact")
def joinedFields(self):
return "\x1f".join(self._fields)
# # fdata table
# self.deck.db.execute("delete from fdata where fid = ?", self.id)
# d = []
# for (fmid, ord, conf) in self._fmap.values():
# val = self._fields[ord]
# d.append(dict(fid=self.id, fmid=fmid, ord=ord,
# val=val))
# d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags))
# self.deck.db.executemany("""
# insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
# # media and caches
# self.deck.updateCache([self.id], "fact")
def cards(self):
return [self.deck.getCard(id) for id in self.deck.db.list(
@ -73,12 +79,12 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
return self._fields
def items(self):
return [(k, self._fields[v])
return [(k, self._fields[v[0]])
for (k, v) in self._fmap.items()]
def _fieldOrd(self, key):
try:
return self._fmap[key][1]
return self._fmap[key][0]
except:
raise KeyError(key)
@ -88,10 +94,6 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
def __setitem__(self, key, value):
self._fields[self._fieldOrd(key)] = value
def fieldsWithIds(self):
return dict(
[(k, (v[0], self[k])) for (k,v) in self._fmap.items()])
# Tags
##################################################
@ -105,12 +107,11 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
##################################################
def fieldUnique(self, name):
(fmid, ord, conf) = self._fmap[name]
if not conf['unique']:
(ord, conf) = self._fmap[name]
if not conf['uniq']:
return True
val = self[name]
csum = fieldChecksum(val)
print "in check, ", self.id
if self.id:
lim = "and fid != :fid"
else:
@ -120,18 +121,18 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
c=csum, v=val, fid=self.id)
def fieldComplete(self, name, text=None):
(fmid, ord, conf) = self._fmap[name]
if not conf['required']:
(ord, conf) = self._fmap[name]
if not conf['req']:
return True
return self[name]
def problems(self):
d = []
for k in self._fmap.keys():
for (k, (ord, conf)) in self._fmap.items():
if not self.fieldUnique(k):
d.append("unique")
d.append((ord, "unique"))
elif not self.fieldComplete(k):
d.append("required")
d.append((ord, "required"))
else:
d.append(None)
return d
d.append((ord, None))
return [x[1] for x in sorted(d)]

View file

@ -400,8 +400,7 @@ def _findCards(deck, query):
tquery += "select id from facts except "
if token == "none":
tquery += """
select id from cards where fid in (select fid from fdata where ord = -1 and
val = ''"""
select id from cards where fid in (select id from facts where tags = '')"""
else:
token = token.replace("*", "%")
if not token.startswith("%"):
@ -410,7 +409,7 @@ val = ''"""
token += " %"
args["_tag_%d" % c] = token
tquery += """
select fid from fdata where ord = -1 and val like :_tag_%d""" % c
select id from facts where tags like :_tag_%d""" % c
elif type == SEARCH_TYPE:
if qquery:
if isNeg:
@ -549,7 +548,7 @@ select id from cards where answer like :_ff_%d escape '\\'""" % c
token = token.replace("*", "%")
args["_ff_%d" % c] = "%"+token+"%"
fquery += """
select id from facts where cache like :_ff_%d escape '\\'""" % c
select id from facts where flds like :_ff_%d escape '\\'""" % c
return (tquery, fquery, qquery, fidquery, cmquery, sfquery,
qaquery, showdistinct, filters, args)

View file

@ -177,11 +177,7 @@ If a file with the same name exists, return a unique name."""
return unicodedata.normalize('NFD', s)
return s
# generate q/a and look through all references
(cids, fids, meta) = self.deck._cacheMeta()
facts = self.deck._cacheFacts(fids)
pend = [self.deck.formatQA(cids[n], facts[fids[n]], meta[cids[n]])
for n in range(len(cids))]
for p in pend:
for p in self.deck.updateCache(type="all"):
for type in ("q", "a"):
for f in self.mediaFiles(p[type]):
normrefs[norm(f)] = True

View file

@ -2,12 +2,6 @@
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
"""\
Models load their templates and fields when they are loaded. If you update a
template or field, you should call model.flush(), rather than trying to save
the subobject directly.
"""
import simplejson
from anki.utils import intTime
from anki.lang import _
@ -36,20 +30,21 @@ class Model(object):
def load(self):
(self.mod,
self.name,
self.fields,
self.conf) = self.deck.db.first("""
select mod, name, conf from models where id = ?""", self.id)
select mod, name, flds, conf from models where id = ?""", self.id)
self.fields = simplejson.loads(self.fields)
self.conf = simplejson.loads(self.conf)
self.loadFields()
self.loadTemplates()
def flush(self):
self.mod = intTime()
ret = self.deck.db.execute("""
insert or replace into models values (?, ?, ?, ?)""",
self.id, self.mod, self.name,
simplejson.dumps(self.conf))
insert or replace into models values (?, ?, ?, ?, ?)""",
self.id, self.mod, self.name,
simplejson.dumps(self.fields),
simplejson.dumps(self.conf))
self.id = ret.lastrowid
[f._flush() for f in self.fields]
[t._flush() for t in self.templates]
def updateCache(self):
@ -64,20 +59,19 @@ insert or replace into models values (?, ?, ?, ?)""",
# Fields
##################################################
def loadFields(self):
sql = "select * from fields where mid = ? order by ord"
self.fields = [Field(self.deck, data)
for data in self.deck.db.all(sql, self.id)]
def newField(self):
return defaultFieldConf.copy()
def addField(self, field):
self.deck.modSchema()
field.mid = self._getID()
field.ord = len(self.fields)
self.fields.append(field)
def fieldMap(self):
"Mapping of field name -> (fmid, ord)."
return dict([(f.name, (f.id, f.ord, f.conf)) for f in self.fields])
"Mapping of field name -> (ord, conf)."
return dict([(f['name'], (c, f)) for c, f in enumerate(self.fields)])
def sortField(self):
return 0
# Templates
##################################################
@ -101,65 +95,33 @@ insert or replace into models values (?, ?, ?, ?)""",
new = Model(self.deck, self.id)
new.id = None
new.name += _(" copy")
new.fields = [f.copy() for f in self.fields]
# get new id
f = new.fields; new.fields = []
t = new.templates; new.templates = []
new.flush()
# then put back
new.fields = f
new.templates = t
for f in new.fields:
f.id = None
f.mid = new.id
f._flush()
for t in new.templates:
t.id = None
t.mid = new.id
t._flush()
return new
# Field model object
# Field object
##########################################################################
defaultFieldConf = {
'rtl': False, # features
'required': False,
'unique': False,
'name': "",
'rtl': False,
'req': False,
'uniq': False,
'font': "Arial",
'quizSize': 20,
'editSize': 20,
'quizColour': "#fff",
'qsize': 20,
'esize': 20,
'qcol': "#fff",
'pre': True,
}
class Field(object):
def __init__(self, deck, data=None):
self.deck = deck
if data:
self.initFromData(data)
else:
self.id = None
self.numeric = 0
self.conf = defaultFieldConf.copy()
def initFromData(self, data):
(self.id,
self.mid,
self.ord,
self.name,
self.numeric,
self.conf) = data
self.conf = simplejson.loads(self.conf)
def _flush(self):
ret = self.deck.db.execute("""
insert or replace into fields values (?, ?, ?, ?, ?, ?)""",
self.id, self.mid, self.ord,
self.name, self.numeric,
simplejson.dumps(self.conf))
self.id = ret.lastrowid
# Template object
##########################################################################

View file

@ -2,7 +2,7 @@
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
from anki.models import Model, Template, Field
from anki.models import Model, Template
from anki.lang import _
models = []
@ -13,13 +13,13 @@ models = []
def BasicModel(deck):
m = Model(deck)
m.name = _("Basic")
fm = Field(deck)
fm.name = _("Front")
fm.conf['required'] = True
fm.conf['unique'] = True
fm = m.newField()
fm['name'] = _("Front")
fm['req'] = True
fm['uniq'] = True
m.addField(fm)
fm = Field(deck)
fm.name = _("Back")
fm = m.newField()
fm['name'] = _("Back")
m.addField(fm)
t = Template(deck)
t.name = _("Forward")

View file

@ -87,22 +87,17 @@ create table if not exists facts (
mid integer not null,
crt integer not null,
mod integer not null,
cache text not null
tags text not null,
flds text not null,
sfld text not null,
data text not null
);
create table if not exists models (
id integer primary key,
mod integer not null,
name text not null,
conf text not null
);
create table if not exists fields (
id integer primary key,
mid integer not null,
ord integer not null,
name text not null,
numeric integer not null,
flds text not null,
conf text not null
);
@ -117,14 +112,6 @@ create table if not exists templates (
conf text not null
);
create table if not exists fdata (
fid integer not null,
fmid integer not null,
ord integer not null,
val text not null,
csum text not null
);
create table if not exists gconf (
id integer primary key,
mod integer not null,
@ -190,9 +177,6 @@ create index if not exists ix_cards_mod on cards (mod);
create index if not exists ix_facts_mod on facts (mod);
-- card spacing, etc
create index if not exists ix_cards_fid on cards (fid);
-- fact data
create index if not exists ix_fdata_fid on fdata (fid);
create index if not exists ix_fdata_csum on fdata (csum);
-- revlog by card
create index if not exists ix_revlog_cid on revlog (cid);
-- media
@ -204,10 +188,16 @@ create index if not exists ix_media_csum on media (csum);
# we don't have access to the progress handler at this point, so the GUI code
# will need to set up a progress handling window before opening a deck.
def _moveTable(db, table, insExtra=""):
def _moveTable(db, table, cards=False):
if cards:
insExtra = " order by created"
else:
insExtra = ""
sql = db.scalar(
"select sql from sqlite_master where name = '%s'" % table)
sql = sql.replace("TABLE "+table, "temporary table %s2" % table)
if cards:
sql = sql.replace("PRIMARY KEY (id),", "")
db.execute(sql)
db.execute("insert into %s2 select * from %s%s" % (table, table, insExtra))
db.execute("drop table "+table)
@ -244,7 +234,7 @@ def _upgradeSchema(db):
# cards
###########
# move into temp table
_moveTable(db, "cards", " order by created")
_moveTable(db, "cards", True)
# use the new order to rewrite card ids
map = dict(db.all("select id, rowid from cards2"))
_insertWithIdChange(db, map, 0, "reviewHistory", 12)
@ -274,26 +264,36 @@ when trim(tags) == "" then ""
else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " "
end)
""")
# we store them as fields now
db.execute("insert into fields select null, id, 0, -1, tags from facts")
# put facts in a temporary table, sorted by created
db.execute("""
create table facts2
(id, modelId, created, modified, cache)""")
db.execute("""
insert into facts2 select id, modelId, created, modified, spaceUntil
# pull facts into memory, so we can merge them with fields efficiently
facts = db.all("""
select id, modelId, cast(created as int), cast(modified as int), tags
from facts order by created""")
# use the new order to rewrite fact ids
map = dict(db.all("select id, rowid from facts2"))
_insertWithIdChange(db, map, 1, "fields", 5)
# build field hash
fields = {}
for (fid, ord, val) in db.execute(
"select factId, ordinal, value from fields order by factId, ordinal"):
if fid not in fields:
fields[fid] = []
fields[fid].append((ord, val))
# build insert data and transform ids, and minimize qt's
# bold/italics/underline cruft.
map = {}
data = []
from anki.utils import minimizeHTML
for c, row in enumerate(facts):
oldid = row[0]
map[oldid] = c+1
row = list(row)
row[0] = c+1
row.append(minimizeHTML("\x1f".join([x[1] for x in sorted(fields[oldid])])))
data.append(row)
# use the new order to rewrite fact ids in cards table
_insertWithIdChange(db, map, 1, "cards", 18)
# and put the facts into the new table
db.execute("drop table facts")
_addSchema(db, False)
db.execute("""
insert or ignore into facts select rowid, modelId,
cast(created as int), cast(modified as int), cache from facts2""")
db.execute("drop table facts2")
db.executemany("insert into facts values (?,?,?,?,?,?,'','')", data)
db.execute("drop table fields")
# media
###########
@ -303,19 +303,12 @@ insert or ignore into media select filename, cast(created as int),
originalPath from media2""")
db.execute("drop table media2")
# fields -> fdata
###########
db.execute("""
insert into fdata select factId, fieldModelId, ordinal, value, ''
from fields order by factId, ordinal""")
db.execute("drop table fields")
# models
###########
_moveTable(db, "models")
db.execute("""
insert into models select id, cast(modified as int),
name, "{}" from models2""")
name, "{}", "{}" from models2""")
db.execute("drop table models2")
# reviewHistory -> revlog
@ -333,8 +326,8 @@ cast(nextFactor*1000 as int), cast(min(thinkingTime, 60)*1000 as int),
# longer migrations
###########
_migrateDeckTbl(db)
_migrateFieldsTbl(db)
_migrateTemplatesTbl(db)
mods = _migrateFieldsTbl(db)
_migrateTemplatesTbl(db, mods)
_updateIndices(db)
return ver
@ -385,32 +378,36 @@ utcOffset, "", "", "" from decks""", t=intTime())
def _migrateFieldsTbl(db):
import anki.models
db.execute("""
insert into fields select id, modelId, ordinal, name, numeric, ''
from fieldModels""")
dconf = anki.models.defaultFieldConf
mods = {}
for row in db.all("""
select id, features, required, "unique", quizFontFamily, quizFontSize,
quizFontColour, editFontSize from fieldModels"""):
select id, modelId, ordinal, name, features, required, "unique",
quizFontFamily, quizFontSize, quizFontColour, editFontSize from fieldModels"""):
conf = dconf.copy()
(conf['rtl'],
conf['required'],
conf['unique'],
if row[1] not in mods:
mods[row[1]] = []
(conf['name'],
conf['rtl'],
conf['req'],
conf['uniq'],
conf['font'],
conf['quizSize'],
conf['quizColour'],
conf['editSize']) = row[1:]
conf['qsize'],
conf['qcol'],
conf['esize']) = row[3:]
# setup bools
conf['rtl'] = not not conf['rtl']
conf['pre'] = True
# save
db.execute("update fields set conf = ? where id = ?",
simplejson.dumps(conf), row[0])
# add to model list with ordinal for sorting
mods[row[1]].append((row[2], conf))
# now we've gathered all the info, save it into the models
for mid, fms in mods.items():
db.execute("update models set flds = ? where id = ?",
simplejson.dumps([x[1] for x in sorted(fms)]), mid)
# clean up
db.execute("drop table fieldModels")
return mods
def _migrateTemplatesTbl(db):
# do this after fieldModel migration
def _migrateTemplatesTbl(db, mods):
import anki.models
db.execute("""
insert into templates select id, modelId, ordinal, name, active, qformat,
@ -425,10 +422,11 @@ allowEmptyAnswer, typeAnswer from cardModels"""):
conf['bg'],
conf['allowEmptyAns'],
fname) = row[2:]
# convert the field name to an id
conf['typeAnswer'] = db.scalar(
"select id from fields where name = ? and mid = ?",
fname, row[1])
# convert the field name to an ordinal
for (ord, fm) in mods[row[1]]:
if fm['name'] == row[1]:
conf['typeAnswer'] = ord
break
# save
db.execute("update templates set conf = ? where id = ?",
simplejson.dumps(conf), row[0])
@ -440,7 +438,6 @@ def _rewriteModelIds(deck):
models = deck.allModels()
deck.db.execute("delete from models")
deck.db.execute("delete from templates")
deck.db.execute("delete from fields")
for c, m in enumerate(models):
old = m.id
m.id = c+1
@ -451,13 +448,6 @@ def _rewriteModelIds(deck):
t._flush()
deck.db.execute(
"update cards set tid = ? where tid = ?", t.mid, oldT)
for f in m.fields:
f.mid = m.id
oldF = f.id
f.id = None
f._flush()
deck.db.execute(
"update fdata set fmid = ? where fmid = ?", f.id, oldF)
m.flush()
deck.db.execute("update facts set mid = ? where mid = ?", m.id, old)
@ -470,20 +460,12 @@ def _postSchemaUpgrade(deck):
"revCardsDue", "revCardsRandom", "acqCardsRandom",
"acqCardsOld", "acqCardsNew"):
deck.db.execute("drop view if exists %s" % v)
# minimize qt's bold/italics/underline cruft. we made need to use lxml to
# do this properly
from anki.utils import minimizeHTML
r = [(minimizeHTML(x[2]), x[0], x[1]) for x in deck.db.execute(
"select fid, fmid, val from fdata")]
deck.db.executemany("update fdata set val = ? where fid = ? and fmid = ?",
r)
# ensure all templates use the new style field format, and update cach
# ensure all templates use the new style field format
for m in deck.allModels():
for t in m.templates:
t.qfmt = re.sub("%\((.+?)\)s", "{{\\1}}", t.qfmt)
t.afmt = re.sub("%\((.+?)\)s", "{{\\1}}", t.afmt)
m.flush()
m.updateCache()
# remove stats, as it's all in the revlog now
deck.db.execute("drop table if exists stats")
# suspended cards don't use ranges anymore

View file

@ -64,7 +64,7 @@ def test_factAddDelete():
assert not p
# now let's make a duplicate and test uniqueness
f2 = deck.newFact()
f2.model.fields[1].conf['required'] = True
f2.model.fields[1]['req'] = True
f2['Front'] = u"one"; f2['Back'] = u""
p = f2.problems()
assert p[0] == "unique"

View file

@ -1,7 +1,7 @@
# coding: utf-8
from tests.shared import getEmptyDeck
from anki.models import Model, Template, Field
from anki.models import Model, Template
from anki.utils import stripHTML
def test_modelDelete():
@ -20,7 +20,6 @@ def test_modelCopy():
m2 = m.copy()
assert m2.name == "Basic copy"
assert m2.id != m.id
assert m2.fields[0].id != m.fields[0].id
assert m2.templates[0].id != m.templates[0].id
assert len(m2.fields) == 2
assert len(m.fields) == 2
@ -29,24 +28,26 @@ def test_modelCopy():
assert len(m2.templates) == 2
def test_modelChange():
print "model change"
return
deck = getEmptyDeck()
m2 = deck.currentModel()
# taken from jp support plugin
m1 = Model(deck)
m1.name = "Japanese"
# field 1
fm = Field(deck)
fm.name = "Expression"
fm.conf['required'] = True
fm.conf['unique'] = True
fm = m1.newField()
fm['name'] = "Expression"
fm['req'] = True
fm['uniq'] = True
m1.addField(fm)
# field2
fm = Field(deck)
fm.name = "Meaning"
fm = m1.newField()
fm['name'] = "Meaning"
m1.addField(fm)
# field3
fm = Field(deck)
fm.name = "Reading"
fm = m1.newField()
fm['name'] = "Reading"
m1.addField(fm)
# template1
t = Template(deck)