change field storage format, improve upgrade speed

Since Anki first moved to an SQL backend, it has stored fields in a fields
table, with one field per line. This is a natural layout in a relational
database, and it had some nice properties. It meant we could retrieve an
individual field of a fact, which we used for limiting searches to a
particular field, for sorting, and for determining if a field was unique, by
adding an index on the field value.

The index was very expensive, so as part of the early work towards 2.0 I added
a checksum field instead, and added an index to that. This was a lot cheaper
than storing the entire value twice for the purpose of fast searches, but it
only partly solved the problem. We still needed an index on factId so that we
could retrieve a given fact's fields quickly. For simple models this was
fairly cheap, but as the number of fields grows the table grows very big. 25k
facts with 30 fields each and the fields table has grown to 750k entries. This
makes the factId index and checksum index really expensive - with the q/a
cache removed, about 30% of the deck in such a situation.

Equally problematic was sorting on those fields. Short of adding another
expensive index, a sort involves a table scan of the entire table.

We solve these problems by moving all fields into the facts table. For this to
work, we need to address some issues:

Sorting: we'll add an option to the model to specify the sort field. When
facts are modified, that field is written to a separate sort column. It can be
HTML stripped, and possibly truncated to a maximum number of letters. This
means that switching sort to a different field involves an expensive rewrite
of the sort column, but people tend to leave their sort field set to the same
value, and we don't need to clear the field if the user switches temporarily
to a non-field sort like due order. And it has the nice properties of allowing
different models to be sorted on different columns at the same time, and
makes it impossible for models to be hidden because the user has sorted on a
field which doesn't appear in some models.

Searching for words with embedded HTML: 1.2 introduced a HTML-stripped cache
of the fields content, which both sped up searches (since we didn't have to
search the possibly large fields table), and meant we could find "bob" in
"b<b>ob</b>" quickly. The ability to quickly search for words peppered with
HTML was nice, but it meant doubling the cost of storing text in many cases,
and meant after any edit more data has to be written to the DB. Instead, we'll
do it on the fly. On this i7 computer, stripping HTML from all fields takes
1-2.6 seconds on 25-50k decks. We could possibly skip the stripping for people
who don't require it - the number of people who bold parts of words is
actually pretty small.

Duplicate detection: one option would be to fetch all fields when the add
cards dialog or editor are opened. But this will be expensive on mobile
devices. Instead, we'll create a separate table of (fid, csum), with an index
on both columns. When we edit a fact, we delete all the existing checksums for
that fact, and add checksums for any fields that must be checked as unique. We
could optionally skip the index on csum - some benchmarking is required.

As for the new table layout, creating separate columns for each field won't
scale. Instead, we store the fields in a single column, separated by an ascii
record separator. We split on that character when extracting from
the database, and join on it when writing to the DB.

Searching on a particular field in the browser will be accomplished by finding
all facts that match, and then unpacking to see if the relevant field matched.

Tags have been moved back to a separate column. Now that fields are on the
facts table, there is no need to pack them in as a field simply to avoid
another table hit.
This commit is contained in:
Damien Elmes 2011-03-10 06:28:25 +09:00
parent 59754eacb2
commit 1078285f0f
10 changed files with 218 additions and 305 deletions

View file

@ -112,10 +112,8 @@ streak=?, lapses=?, grade=?, cycles=? where id = ?""",
def _getQA(self, reload=False): def _getQA(self, reload=False):
# this is a hack at the moment # this is a hack at the moment
if not self._qa or reload: if not self._qa or reload:
self._qa = self.deck.formatQA( self._qa = self.deck.updateCache(
self.id, [self.id], "card")[0]
self.deck._cacheFacts([self.fid])[self.fid],
self.deck._cacheMeta("and c.id = %d" % self.id)[2][self.id])
return self._qa return self._qa
def fact(self): def fact(self):

View file

@ -148,6 +148,11 @@ qconf=?, conf=?, data=?""",
# unsorted # unsorted
########################################################################## ##########################################################################
def nextID(self, type):
id = self.conf.get(type, 1)
self.conf[type] = id+1
return id
def reset(self): def reset(self):
self.sched.reset() self.sched.reset()
# recache css # recache css
@ -500,14 +505,12 @@ due > :now and due < :now""", now=time.time())
ok = [] ok = []
for template in fact.model.templates: for template in fact.model.templates:
if template.active or not checkActive: if template.active or not checkActive:
# [cid, fid, qfmt, afmt, tags, model, template, group] # [cid, fid, mid, tid, gid, tags, flds, data]
meta = [None, template.qfmt, template.afmt, data = [1, 1, fact.model.id, template.id, 1,
"", "", "", ""] "", fact.joinedFields(), ""]
fields = fact.fieldsWithIds() now = self.formatQA(fact.model, template, "", data)
now = self.formatQA(None, fields, meta, False) data[6] = "\x1f".join([""]*len(fact._fields))
for k in fields.keys(): empty = self.formatQA(fact.model, template, "", data)
fields[k] = (fields[k][0], "")
empty = self.formatQA(None, fields, meta, False)
if now['q'] == empty['q']: if now['q'] == empty['q']:
continue continue
if not template.conf['allowEmptyAns']: if not template.conf['allowEmptyAns']:
@ -557,7 +560,7 @@ where fid = :fid and tid = :cmid""",
return return
strids = ids2str(ids) strids = ids2str(ids)
self.db.execute("delete from facts where id in %s" % strids) self.db.execute("delete from facts where id in %s" % strids)
self.db.execute("delete from fdata where fid in %s" % strids) #self.db.execute("delete from fdata where fid in %s" % strids)
def _deleteDanglingFacts(self): def _deleteDanglingFacts(self):
"Delete any facts without cards. Don't call this directly." "Delete any facts without cards. Don't call this directly."
@ -659,7 +662,6 @@ select id from cards where fid in (select id from facts where mid = ?)""",
# then the model # then the model
self.db.execute("delete from models where id = ?", mid) self.db.execute("delete from models where id = ?", mid)
self.db.execute("delete from templates where mid = ?", mid) self.db.execute("delete from templates where mid = ?", mid)
self.db.execute("delete from fields where mid = ?", mid)
# GUI should ensure last model is not deleted # GUI should ensure last model is not deleted
if self.conf['currentModelId'] == mid: if self.conf['currentModelId'] == mid:
self.conf['currentModelId'] = self.db.scalar( self.conf['currentModelId'] = self.db.scalar(
@ -904,7 +906,7 @@ where tid in %s""" % strids, now=time.time())
# Caches: q/a, facts.cache and fdata.csum # Caches: q/a, facts.cache and fdata.csum
########################################################################## ##########################################################################
def updateCache(self, ids, type="card"): def updateCache(self, ids=None, type="card"):
"Update cache after facts or models changed." "Update cache after facts or models changed."
# gather metadata # gather metadata
if type == "card": if type == "card":
@ -913,87 +915,62 @@ where tid in %s""" % strids, now=time.time())
where = "and f.id in " + ids2str(ids) where = "and f.id in " + ids2str(ids)
elif type == "model": elif type == "model":
where = "and m.id in " + ids2str(ids) where = "and m.id in " + ids2str(ids)
(cids, fids, meta) = self._cacheMeta(where) elif type == "all":
if not cids: where = ""
return
# and fact info
facts = self._cacheFacts(fids)
# generate q/a
pend = [self.formatQA(cids[n], facts[fids[n]], meta[cids[n]])
for n in range(len(cids))]
for p in pend:
self.media.registerText(p['q'])
self.media.registerText(p['a'])
# fact value cache
self._updateFieldCache(facts)
# and checksum
self._updateFieldChecksums(facts)
def formatQA(self, cardId, fact, meta, filters=True):
"Returns hash of id, question, answer."
d = {'id': cardId}
fields = {}
tags = None
for (k, v) in fact.items():
if k == None:
tags = v[1]
continue
fields["text:"+k] = stripHTML(v[1])
if v[1]:
fields[k] = '<span class="fm%s">%s</span>' % (
hexifyID(v[0]), v[1])
else: else:
fields[k] = u"" raise Exception()
fields['Tags'] = tags mods = {}
fields['Model'] = meta[3] templs = {}
fields['Template'] = meta[4] for m in self.allModels():
fields['Group'] = meta[5] mods[m.id] = m
for t in m.templates:
templs[t.id] = t
groups = dict(self.db.all("select id, name from groups"))
return [self.formatQA(mods[row[2]], templs[row[3]], groups[row[4]], row)
for row in self._qaData(where)]
# # and checksum
# self._updateFieldChecksums(facts)
def formatQA(self, model, template, gname, data, filters=True):
"Returns hash of id, question, answer."
# data is [cid, fid, mid, tid, gid, tags, flds, data]
# unpack fields and create dict
flist = data[6].split("\x1f")
fields = {}
for (name, (idx, conf)) in model.fieldMap().items():
fields[name] = flist[idx]
fields["text:"+name] = stripHTML(fields[name])
if fields[name]:
fields["text:"+name] = stripHTML(fields[name])
fields[name] = '<span class="fm%s-%s">%s</span>' % (
hexifyID(data[2]), hexifyID(idx), fields[name])
else:
fields["text:"+name] = ""
fields[name] = ""
fields['Tags'] = data[5]
fields['Model'] = model.name
fields['Template'] = template.name
fields['Group'] = gname
# render q & a # render q & a
for (type, format) in (("q", meta[1]), ("a", meta[2])): d = dict(id=data[0])
if filters: for (type, format) in (("q", template.qfmt), ("a", template.afmt)):
fields = runFilter("formatQA.pre", fields, meta, self) # if filters:
# fields = runFilter("formatQA.pre", fields, , self)
html = anki.template.render(format, fields) html = anki.template.render(format, fields)
if filters: # if filters:
d[type] = runFilter("formatQA.post", html, fields, meta, self) # d[type] = runFilter("formatQA.post", html, fields, meta, self)
self.media.registerText(html)
d[type] = html d[type] = html
return d return d
def _cacheMeta(self, where=""): def _qaData(self, where=""):
"Return cids, fids, and cid -> data hash." "Return [cid, fid, mid, tid, gid, tags, flds, data] db query"
# data is [fid, qfmt, afmt, model, template, group] return self.db.execute("""
meta = {} select c.id, f.id, m.id, t.id, g.id, f.tags, f.flds, f.data
cids = [] from cards c, facts f, models m, templates t, groups g
fids = [] where c.fid == f.id and f.mid == m.id and
for r in self.db.execute("""
select c.id, f.id, t.qfmt, t.afmt, m.name, t.name, g.name
from cards c, facts f, models m, templates t, groups g where
c.fid == f.id and f.mid == m.id and
c.tid = t.id and c.gid = g.id c.tid = t.id and c.gid = g.id
%s""" % where): %s""" % where)
meta[r[0]] = r[1:]
cids.append(r[0])
fids.append(r[1])
return (cids, fids, meta)
def _cacheFacts(self, ids):
"Return a hash of fid -> (name -> (id, val))."
facts = {}
for id, fields in groupby(self.db.all("""
select fdata.fid, fields.name, fields.id, fdata.val
from fdata left outer join fields on fdata.fmid = fields.id
where fdata.fid in %s order by fdata.fid""" % ids2str(ids)), itemgetter(0)):
facts[id] = dict([(f[1], f[2:]) for f in fields])
return facts
def _updateFieldCache(self, facts):
"Add stripped HTML cache for searching."
r = []
from anki.utils import stripHTMLMedia
[r.append((stripHTMLMedia(
" ".join([x[1] for x in map.values()])), id))
for (id, map) in facts.items()]
self.db.executemany(
"update facts set cache=? where id=?", r)
def _updateFieldChecksums(self, facts): def _updateFieldChecksums(self, facts):
print "benchmark updatefieldchecksums" print "benchmark updatefieldchecksums"
@ -1055,26 +1032,23 @@ insert or ignore into tags (mod, name) values (%d, :t)""" % intTime(),
self.registerTags(newTags) self.registerTags(newTags)
# find facts missing the tags # find facts missing the tags
if add: if add:
l = "val not " l = "tags not "
fn = addTags fn = addTags
else: else:
l = "val " l = "tags "
fn = deleteTags fn = deleteTags
lim = " or ".join( lim = " or ".join(
[l+"like :_%d" % c for c, t in enumerate(newTags)]) [l+"like :_%d" % c for c, t in enumerate(newTags)])
res = self.db.all( res = self.db.all(
"select fid, val from fdata where ord = -1 and " + lim, "select id, tags from facts where " + lim,
**dict([("_%d" % x, '%% %s %%' % y) for x, y in enumerate(newTags)])) **dict([("_%d" % x, '%% %s %%' % y) for x, y in enumerate(newTags)]))
# update tags # update tags
fids = [] fids = []
def fix(row): def fix(row):
fids.append(row[0]) fids.append(row[0])
return {'id': row[0], 't': fn(tags, row[1])} return {'id': row[0], 't': fn(tags, row[1]), 'n':intTime()}
self.db.executemany(""" self.db.executemany("""
update fdata set val = :t update facts set tags = :t, mod = :n where id = :id""", [fix(row) for row in res])
where fid = :id""", [fix(row) for row in res])
self.db.execute("update facts set mod = ? where id in " +
ids2str(fids), intTime())
# update q/a cache # update q/a cache
self.updateCache(fids, type="fact") self.updateCache(fids, type="fact")
self.finishProgress() self.finishProgress()

View file

@ -24,40 +24,46 @@ class Fact(object):
self.tags = "" self.tags = ""
self.cache = "" self.cache = ""
self._fields = [""] * len(self.model.fields) self._fields = [""] * len(self.model.fields)
self.data = ""
self._fmap = self.model.fieldMap() self._fmap = self.model.fieldMap()
def load(self): def load(self):
(self.mid, (self.mid,
self.crt, self.crt,
self.mod) = self.deck.db.first(""" self.mod,
select mid, crt, mod from facts where id = ?""", self.id) self.tags,
self._fields = self.deck.db.list(""" self._fields,
select val from fdata where fid = ? and fmid order by ord""", self.id) self.data) = self.deck.db.first("""
self.tags = self.deck.db.scalar(""" select mid, crt, mod, tags, flds, data from facts where id = ?""", self.id)
select val from fdata where fid = ? and ord = -1""", self.id) self._fields = self._field.split("\x1f")
self.model = self.deck.getModel(self.mid) self.model = self.deck.getModel(self.mid)
def flush(self, cache=True): def flush(self, cache=True):
self.mod = intTime() self.mod = intTime()
# facts table # facts table
self.cache = stripHTMLMedia(u" ".join(self._fields)) sfld = self._fields[self.model.sortField()]
res = self.deck.db.execute(""" res = self.deck.db.execute("""
insert or replace into facts values (?, ?, ?, ?, ?)""", insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""",
self.id, self.mid, self.crt, self.id, self.mid, self.crt,
self.mod, self.cache) self.mod, self.tags, self.joinedFields(),
sfld, self.data)
self.id = res.lastrowid self.id = res.lastrowid
# fdata table
self.deck.db.execute("delete from fdata where fid = ?", self.id) def joinedFields(self):
d = [] return "\x1f".join(self._fields)
for (fmid, ord, conf) in self._fmap.values():
val = self._fields[ord] # # fdata table
d.append(dict(fid=self.id, fmid=fmid, ord=ord, # self.deck.db.execute("delete from fdata where fid = ?", self.id)
val=val)) # d = []
d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags)) # for (fmid, ord, conf) in self._fmap.values():
self.deck.db.executemany(""" # val = self._fields[ord]
insert into fdata values (:fid, :fmid, :ord, :val, '')""", d) # d.append(dict(fid=self.id, fmid=fmid, ord=ord,
# media and caches # val=val))
self.deck.updateCache([self.id], "fact") # d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags))
# self.deck.db.executemany("""
# insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
# # media and caches
# self.deck.updateCache([self.id], "fact")
def cards(self): def cards(self):
return [self.deck.getCard(id) for id in self.deck.db.list( return [self.deck.getCard(id) for id in self.deck.db.list(
@ -73,12 +79,12 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
return self._fields return self._fields
def items(self): def items(self):
return [(k, self._fields[v]) return [(k, self._fields[v[0]])
for (k, v) in self._fmap.items()] for (k, v) in self._fmap.items()]
def _fieldOrd(self, key): def _fieldOrd(self, key):
try: try:
return self._fmap[key][1] return self._fmap[key][0]
except: except:
raise KeyError(key) raise KeyError(key)
@ -88,10 +94,6 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
def __setitem__(self, key, value): def __setitem__(self, key, value):
self._fields[self._fieldOrd(key)] = value self._fields[self._fieldOrd(key)] = value
def fieldsWithIds(self):
return dict(
[(k, (v[0], self[k])) for (k,v) in self._fmap.items()])
# Tags # Tags
################################################## ##################################################
@ -105,12 +107,11 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
################################################## ##################################################
def fieldUnique(self, name): def fieldUnique(self, name):
(fmid, ord, conf) = self._fmap[name] (ord, conf) = self._fmap[name]
if not conf['unique']: if not conf['uniq']:
return True return True
val = self[name] val = self[name]
csum = fieldChecksum(val) csum = fieldChecksum(val)
print "in check, ", self.id
if self.id: if self.id:
lim = "and fid != :fid" lim = "and fid != :fid"
else: else:
@ -120,18 +121,18 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
c=csum, v=val, fid=self.id) c=csum, v=val, fid=self.id)
def fieldComplete(self, name, text=None): def fieldComplete(self, name, text=None):
(fmid, ord, conf) = self._fmap[name] (ord, conf) = self._fmap[name]
if not conf['required']: if not conf['req']:
return True return True
return self[name] return self[name]
def problems(self): def problems(self):
d = [] d = []
for k in self._fmap.keys(): for (k, (ord, conf)) in self._fmap.items():
if not self.fieldUnique(k): if not self.fieldUnique(k):
d.append("unique") d.append((ord, "unique"))
elif not self.fieldComplete(k): elif not self.fieldComplete(k):
d.append("required") d.append((ord, "required"))
else: else:
d.append(None) d.append((ord, None))
return d return [x[1] for x in sorted(d)]

View file

@ -400,8 +400,7 @@ def _findCards(deck, query):
tquery += "select id from facts except " tquery += "select id from facts except "
if token == "none": if token == "none":
tquery += """ tquery += """
select id from cards where fid in (select fid from fdata where ord = -1 and select id from cards where fid in (select id from facts where tags = '')"""
val = ''"""
else: else:
token = token.replace("*", "%") token = token.replace("*", "%")
if not token.startswith("%"): if not token.startswith("%"):
@ -410,7 +409,7 @@ val = ''"""
token += " %" token += " %"
args["_tag_%d" % c] = token args["_tag_%d" % c] = token
tquery += """ tquery += """
select fid from fdata where ord = -1 and val like :_tag_%d""" % c select id from facts where tags like :_tag_%d""" % c
elif type == SEARCH_TYPE: elif type == SEARCH_TYPE:
if qquery: if qquery:
if isNeg: if isNeg:
@ -549,7 +548,7 @@ select id from cards where answer like :_ff_%d escape '\\'""" % c
token = token.replace("*", "%") token = token.replace("*", "%")
args["_ff_%d" % c] = "%"+token+"%" args["_ff_%d" % c] = "%"+token+"%"
fquery += """ fquery += """
select id from facts where cache like :_ff_%d escape '\\'""" % c select id from facts where flds like :_ff_%d escape '\\'""" % c
return (tquery, fquery, qquery, fidquery, cmquery, sfquery, return (tquery, fquery, qquery, fidquery, cmquery, sfquery,
qaquery, showdistinct, filters, args) qaquery, showdistinct, filters, args)

View file

@ -177,11 +177,7 @@ If a file with the same name exists, return a unique name."""
return unicodedata.normalize('NFD', s) return unicodedata.normalize('NFD', s)
return s return s
# generate q/a and look through all references # generate q/a and look through all references
(cids, fids, meta) = self.deck._cacheMeta() for p in self.deck.updateCache(type="all"):
facts = self.deck._cacheFacts(fids)
pend = [self.deck.formatQA(cids[n], facts[fids[n]], meta[cids[n]])
for n in range(len(cids))]
for p in pend:
for type in ("q", "a"): for type in ("q", "a"):
for f in self.mediaFiles(p[type]): for f in self.mediaFiles(p[type]):
normrefs[norm(f)] = True normrefs[norm(f)] = True

View file

@ -2,12 +2,6 @@
# Copyright: Damien Elmes <anki@ichi2.net> # Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html # License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
"""\
Models load their templates and fields when they are loaded. If you update a
template or field, you should call model.flush(), rather than trying to save
the subobject directly.
"""
import simplejson import simplejson
from anki.utils import intTime from anki.utils import intTime
from anki.lang import _ from anki.lang import _
@ -36,20 +30,21 @@ class Model(object):
def load(self): def load(self):
(self.mod, (self.mod,
self.name, self.name,
self.fields,
self.conf) = self.deck.db.first(""" self.conf) = self.deck.db.first("""
select mod, name, conf from models where id = ?""", self.id) select mod, name, flds, conf from models where id = ?""", self.id)
self.fields = simplejson.loads(self.fields)
self.conf = simplejson.loads(self.conf) self.conf = simplejson.loads(self.conf)
self.loadFields()
self.loadTemplates() self.loadTemplates()
def flush(self): def flush(self):
self.mod = intTime() self.mod = intTime()
ret = self.deck.db.execute(""" ret = self.deck.db.execute("""
insert or replace into models values (?, ?, ?, ?)""", insert or replace into models values (?, ?, ?, ?, ?)""",
self.id, self.mod, self.name, self.id, self.mod, self.name,
simplejson.dumps(self.fields),
simplejson.dumps(self.conf)) simplejson.dumps(self.conf))
self.id = ret.lastrowid self.id = ret.lastrowid
[f._flush() for f in self.fields]
[t._flush() for t in self.templates] [t._flush() for t in self.templates]
def updateCache(self): def updateCache(self):
@ -64,20 +59,19 @@ insert or replace into models values (?, ?, ?, ?)""",
# Fields # Fields
################################################## ##################################################
def loadFields(self): def newField(self):
sql = "select * from fields where mid = ? order by ord" return defaultFieldConf.copy()
self.fields = [Field(self.deck, data)
for data in self.deck.db.all(sql, self.id)]
def addField(self, field): def addField(self, field):
self.deck.modSchema() self.deck.modSchema()
field.mid = self._getID()
field.ord = len(self.fields)
self.fields.append(field) self.fields.append(field)
def fieldMap(self): def fieldMap(self):
"Mapping of field name -> (fmid, ord)." "Mapping of field name -> (ord, conf)."
return dict([(f.name, (f.id, f.ord, f.conf)) for f in self.fields]) return dict([(f['name'], (c, f)) for c, f in enumerate(self.fields)])
def sortField(self):
return 0
# Templates # Templates
################################################## ##################################################
@ -101,65 +95,33 @@ insert or replace into models values (?, ?, ?, ?)""",
new = Model(self.deck, self.id) new = Model(self.deck, self.id)
new.id = None new.id = None
new.name += _(" copy") new.name += _(" copy")
new.fields = [f.copy() for f in self.fields]
# get new id # get new id
f = new.fields; new.fields = []
t = new.templates; new.templates = [] t = new.templates; new.templates = []
new.flush() new.flush()
# then put back # then put back
new.fields = f
new.templates = t new.templates = t
for f in new.fields:
f.id = None
f.mid = new.id
f._flush()
for t in new.templates: for t in new.templates:
t.id = None t.id = None
t.mid = new.id t.mid = new.id
t._flush() t._flush()
return new return new
# Field model object # Field object
########################################################################## ##########################################################################
defaultFieldConf = { defaultFieldConf = {
'rtl': False, # features 'name': "",
'required': False, 'rtl': False,
'unique': False, 'req': False,
'uniq': False,
'font': "Arial", 'font': "Arial",
'quizSize': 20, 'qsize': 20,
'editSize': 20, 'esize': 20,
'quizColour': "#fff", 'qcol': "#fff",
'pre': True, 'pre': True,
} }
class Field(object):
def __init__(self, deck, data=None):
self.deck = deck
if data:
self.initFromData(data)
else:
self.id = None
self.numeric = 0
self.conf = defaultFieldConf.copy()
def initFromData(self, data):
(self.id,
self.mid,
self.ord,
self.name,
self.numeric,
self.conf) = data
self.conf = simplejson.loads(self.conf)
def _flush(self):
ret = self.deck.db.execute("""
insert or replace into fields values (?, ?, ?, ?, ?, ?)""",
self.id, self.mid, self.ord,
self.name, self.numeric,
simplejson.dumps(self.conf))
self.id = ret.lastrowid
# Template object # Template object
########################################################################## ##########################################################################

View file

@ -2,7 +2,7 @@
# Copyright: Damien Elmes <anki@ichi2.net> # Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html # License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
from anki.models import Model, Template, Field from anki.models import Model, Template
from anki.lang import _ from anki.lang import _
models = [] models = []
@ -13,13 +13,13 @@ models = []
def BasicModel(deck): def BasicModel(deck):
m = Model(deck) m = Model(deck)
m.name = _("Basic") m.name = _("Basic")
fm = Field(deck) fm = m.newField()
fm.name = _("Front") fm['name'] = _("Front")
fm.conf['required'] = True fm['req'] = True
fm.conf['unique'] = True fm['uniq'] = True
m.addField(fm) m.addField(fm)
fm = Field(deck) fm = m.newField()
fm.name = _("Back") fm['name'] = _("Back")
m.addField(fm) m.addField(fm)
t = Template(deck) t = Template(deck)
t.name = _("Forward") t.name = _("Forward")

View file

@ -87,22 +87,17 @@ create table if not exists facts (
mid integer not null, mid integer not null,
crt integer not null, crt integer not null,
mod integer not null, mod integer not null,
cache text not null tags text not null,
flds text not null,
sfld text not null,
data text not null
); );
create table if not exists models ( create table if not exists models (
id integer primary key, id integer primary key,
mod integer not null, mod integer not null,
name text not null, name text not null,
conf text not null flds text not null,
);
create table if not exists fields (
id integer primary key,
mid integer not null,
ord integer not null,
name text not null,
numeric integer not null,
conf text not null conf text not null
); );
@ -117,14 +112,6 @@ create table if not exists templates (
conf text not null conf text not null
); );
create table if not exists fdata (
fid integer not null,
fmid integer not null,
ord integer not null,
val text not null,
csum text not null
);
create table if not exists gconf ( create table if not exists gconf (
id integer primary key, id integer primary key,
mod integer not null, mod integer not null,
@ -190,9 +177,6 @@ create index if not exists ix_cards_mod on cards (mod);
create index if not exists ix_facts_mod on facts (mod); create index if not exists ix_facts_mod on facts (mod);
-- card spacing, etc -- card spacing, etc
create index if not exists ix_cards_fid on cards (fid); create index if not exists ix_cards_fid on cards (fid);
-- fact data
create index if not exists ix_fdata_fid on fdata (fid);
create index if not exists ix_fdata_csum on fdata (csum);
-- revlog by card -- revlog by card
create index if not exists ix_revlog_cid on revlog (cid); create index if not exists ix_revlog_cid on revlog (cid);
-- media -- media
@ -204,10 +188,16 @@ create index if not exists ix_media_csum on media (csum);
# we don't have access to the progress handler at this point, so the GUI code # we don't have access to the progress handler at this point, so the GUI code
# will need to set up a progress handling window before opening a deck. # will need to set up a progress handling window before opening a deck.
def _moveTable(db, table, insExtra=""): def _moveTable(db, table, cards=False):
if cards:
insExtra = " order by created"
else:
insExtra = ""
sql = db.scalar( sql = db.scalar(
"select sql from sqlite_master where name = '%s'" % table) "select sql from sqlite_master where name = '%s'" % table)
sql = sql.replace("TABLE "+table, "temporary table %s2" % table) sql = sql.replace("TABLE "+table, "temporary table %s2" % table)
if cards:
sql = sql.replace("PRIMARY KEY (id),", "")
db.execute(sql) db.execute(sql)
db.execute("insert into %s2 select * from %s%s" % (table, table, insExtra)) db.execute("insert into %s2 select * from %s%s" % (table, table, insExtra))
db.execute("drop table "+table) db.execute("drop table "+table)
@ -244,7 +234,7 @@ def _upgradeSchema(db):
# cards # cards
########### ###########
# move into temp table # move into temp table
_moveTable(db, "cards", " order by created") _moveTable(db, "cards", True)
# use the new order to rewrite card ids # use the new order to rewrite card ids
map = dict(db.all("select id, rowid from cards2")) map = dict(db.all("select id, rowid from cards2"))
_insertWithIdChange(db, map, 0, "reviewHistory", 12) _insertWithIdChange(db, map, 0, "reviewHistory", 12)
@ -274,26 +264,36 @@ when trim(tags) == "" then ""
else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " " else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " "
end) end)
""") """)
# we store them as fields now # pull facts into memory, so we can merge them with fields efficiently
db.execute("insert into fields select null, id, 0, -1, tags from facts") facts = db.all("""
# put facts in a temporary table, sorted by created select id, modelId, cast(created as int), cast(modified as int), tags
db.execute("""
create table facts2
(id, modelId, created, modified, cache)""")
db.execute("""
insert into facts2 select id, modelId, created, modified, spaceUntil
from facts order by created""") from facts order by created""")
# use the new order to rewrite fact ids # build field hash
map = dict(db.all("select id, rowid from facts2")) fields = {}
_insertWithIdChange(db, map, 1, "fields", 5) for (fid, ord, val) in db.execute(
"select factId, ordinal, value from fields order by factId, ordinal"):
if fid not in fields:
fields[fid] = []
fields[fid].append((ord, val))
# build insert data and transform ids, and minimize qt's
# bold/italics/underline cruft.
map = {}
data = []
from anki.utils import minimizeHTML
for c, row in enumerate(facts):
oldid = row[0]
map[oldid] = c+1
row = list(row)
row[0] = c+1
row.append(minimizeHTML("\x1f".join([x[1] for x in sorted(fields[oldid])])))
data.append(row)
# use the new order to rewrite fact ids in cards table
_insertWithIdChange(db, map, 1, "cards", 18) _insertWithIdChange(db, map, 1, "cards", 18)
# and put the facts into the new table # and put the facts into the new table
db.execute("drop table facts") db.execute("drop table facts")
_addSchema(db, False) _addSchema(db, False)
db.execute(""" db.executemany("insert into facts values (?,?,?,?,?,?,'','')", data)
insert or ignore into facts select rowid, modelId, db.execute("drop table fields")
cast(created as int), cast(modified as int), cache from facts2""")
db.execute("drop table facts2")
# media # media
########### ###########
@ -303,19 +303,12 @@ insert or ignore into media select filename, cast(created as int),
originalPath from media2""") originalPath from media2""")
db.execute("drop table media2") db.execute("drop table media2")
# fields -> fdata
###########
db.execute("""
insert into fdata select factId, fieldModelId, ordinal, value, ''
from fields order by factId, ordinal""")
db.execute("drop table fields")
# models # models
########### ###########
_moveTable(db, "models") _moveTable(db, "models")
db.execute(""" db.execute("""
insert into models select id, cast(modified as int), insert into models select id, cast(modified as int),
name, "{}" from models2""") name, "{}", "{}" from models2""")
db.execute("drop table models2") db.execute("drop table models2")
# reviewHistory -> revlog # reviewHistory -> revlog
@ -333,8 +326,8 @@ cast(nextFactor*1000 as int), cast(min(thinkingTime, 60)*1000 as int),
# longer migrations # longer migrations
########### ###########
_migrateDeckTbl(db) _migrateDeckTbl(db)
_migrateFieldsTbl(db) mods = _migrateFieldsTbl(db)
_migrateTemplatesTbl(db) _migrateTemplatesTbl(db, mods)
_updateIndices(db) _updateIndices(db)
return ver return ver
@ -385,32 +378,36 @@ utcOffset, "", "", "" from decks""", t=intTime())
def _migrateFieldsTbl(db): def _migrateFieldsTbl(db):
import anki.models import anki.models
db.execute("""
insert into fields select id, modelId, ordinal, name, numeric, ''
from fieldModels""")
dconf = anki.models.defaultFieldConf dconf = anki.models.defaultFieldConf
mods = {}
for row in db.all(""" for row in db.all("""
select id, features, required, "unique", quizFontFamily, quizFontSize, select id, modelId, ordinal, name, features, required, "unique",
quizFontColour, editFontSize from fieldModels"""): quizFontFamily, quizFontSize, quizFontColour, editFontSize from fieldModels"""):
conf = dconf.copy() conf = dconf.copy()
(conf['rtl'], if row[1] not in mods:
conf['required'], mods[row[1]] = []
conf['unique'], (conf['name'],
conf['rtl'],
conf['req'],
conf['uniq'],
conf['font'], conf['font'],
conf['quizSize'], conf['qsize'],
conf['quizColour'], conf['qcol'],
conf['editSize']) = row[1:] conf['esize']) = row[3:]
# setup bools # setup bools
conf['rtl'] = not not conf['rtl'] conf['rtl'] = not not conf['rtl']
conf['pre'] = True conf['pre'] = True
# save # add to model list with ordinal for sorting
db.execute("update fields set conf = ? where id = ?", mods[row[1]].append((row[2], conf))
simplejson.dumps(conf), row[0]) # now we've gathered all the info, save it into the models
for mid, fms in mods.items():
db.execute("update models set flds = ? where id = ?",
simplejson.dumps([x[1] for x in sorted(fms)]), mid)
# clean up # clean up
db.execute("drop table fieldModels") db.execute("drop table fieldModels")
return mods
def _migrateTemplatesTbl(db): def _migrateTemplatesTbl(db, mods):
# do this after fieldModel migration
import anki.models import anki.models
db.execute(""" db.execute("""
insert into templates select id, modelId, ordinal, name, active, qformat, insert into templates select id, modelId, ordinal, name, active, qformat,
@ -425,10 +422,11 @@ allowEmptyAnswer, typeAnswer from cardModels"""):
conf['bg'], conf['bg'],
conf['allowEmptyAns'], conf['allowEmptyAns'],
fname) = row[2:] fname) = row[2:]
# convert the field name to an id # convert the field name to an ordinal
conf['typeAnswer'] = db.scalar( for (ord, fm) in mods[row[1]]:
"select id from fields where name = ? and mid = ?", if fm['name'] == row[1]:
fname, row[1]) conf['typeAnswer'] = ord
break
# save # save
db.execute("update templates set conf = ? where id = ?", db.execute("update templates set conf = ? where id = ?",
simplejson.dumps(conf), row[0]) simplejson.dumps(conf), row[0])
@ -440,7 +438,6 @@ def _rewriteModelIds(deck):
models = deck.allModels() models = deck.allModels()
deck.db.execute("delete from models") deck.db.execute("delete from models")
deck.db.execute("delete from templates") deck.db.execute("delete from templates")
deck.db.execute("delete from fields")
for c, m in enumerate(models): for c, m in enumerate(models):
old = m.id old = m.id
m.id = c+1 m.id = c+1
@ -451,13 +448,6 @@ def _rewriteModelIds(deck):
t._flush() t._flush()
deck.db.execute( deck.db.execute(
"update cards set tid = ? where tid = ?", t.mid, oldT) "update cards set tid = ? where tid = ?", t.mid, oldT)
for f in m.fields:
f.mid = m.id
oldF = f.id
f.id = None
f._flush()
deck.db.execute(
"update fdata set fmid = ? where fmid = ?", f.id, oldF)
m.flush() m.flush()
deck.db.execute("update facts set mid = ? where mid = ?", m.id, old) deck.db.execute("update facts set mid = ? where mid = ?", m.id, old)
@ -470,20 +460,12 @@ def _postSchemaUpgrade(deck):
"revCardsDue", "revCardsRandom", "acqCardsRandom", "revCardsDue", "revCardsRandom", "acqCardsRandom",
"acqCardsOld", "acqCardsNew"): "acqCardsOld", "acqCardsNew"):
deck.db.execute("drop view if exists %s" % v) deck.db.execute("drop view if exists %s" % v)
# minimize qt's bold/italics/underline cruft. we made need to use lxml to # ensure all templates use the new style field format
# do this properly
from anki.utils import minimizeHTML
r = [(minimizeHTML(x[2]), x[0], x[1]) for x in deck.db.execute(
"select fid, fmid, val from fdata")]
deck.db.executemany("update fdata set val = ? where fid = ? and fmid = ?",
r)
# ensure all templates use the new style field format, and update cach
for m in deck.allModels(): for m in deck.allModels():
for t in m.templates: for t in m.templates:
t.qfmt = re.sub("%\((.+?)\)s", "{{\\1}}", t.qfmt) t.qfmt = re.sub("%\((.+?)\)s", "{{\\1}}", t.qfmt)
t.afmt = re.sub("%\((.+?)\)s", "{{\\1}}", t.afmt) t.afmt = re.sub("%\((.+?)\)s", "{{\\1}}", t.afmt)
m.flush() m.flush()
m.updateCache()
# remove stats, as it's all in the revlog now # remove stats, as it's all in the revlog now
deck.db.execute("drop table if exists stats") deck.db.execute("drop table if exists stats")
# suspended cards don't use ranges anymore # suspended cards don't use ranges anymore

View file

@ -64,7 +64,7 @@ def test_factAddDelete():
assert not p assert not p
# now let's make a duplicate and test uniqueness # now let's make a duplicate and test uniqueness
f2 = deck.newFact() f2 = deck.newFact()
f2.model.fields[1].conf['required'] = True f2.model.fields[1]['req'] = True
f2['Front'] = u"one"; f2['Back'] = u"" f2['Front'] = u"one"; f2['Back'] = u""
p = f2.problems() p = f2.problems()
assert p[0] == "unique" assert p[0] == "unique"

View file

@ -1,7 +1,7 @@
# coding: utf-8 # coding: utf-8
from tests.shared import getEmptyDeck from tests.shared import getEmptyDeck
from anki.models import Model, Template, Field from anki.models import Model, Template
from anki.utils import stripHTML from anki.utils import stripHTML
def test_modelDelete(): def test_modelDelete():
@ -20,7 +20,6 @@ def test_modelCopy():
m2 = m.copy() m2 = m.copy()
assert m2.name == "Basic copy" assert m2.name == "Basic copy"
assert m2.id != m.id assert m2.id != m.id
assert m2.fields[0].id != m.fields[0].id
assert m2.templates[0].id != m.templates[0].id assert m2.templates[0].id != m.templates[0].id
assert len(m2.fields) == 2 assert len(m2.fields) == 2
assert len(m.fields) == 2 assert len(m.fields) == 2
@ -29,24 +28,26 @@ def test_modelCopy():
assert len(m2.templates) == 2 assert len(m2.templates) == 2
def test_modelChange(): def test_modelChange():
print "model change"
return
deck = getEmptyDeck() deck = getEmptyDeck()
m2 = deck.currentModel() m2 = deck.currentModel()
# taken from jp support plugin # taken from jp support plugin
m1 = Model(deck) m1 = Model(deck)
m1.name = "Japanese" m1.name = "Japanese"
# field 1 # field 1
fm = Field(deck) fm = m1.newField()
fm.name = "Expression" fm['name'] = "Expression"
fm.conf['required'] = True fm['req'] = True
fm.conf['unique'] = True fm['uniq'] = True
m1.addField(fm) m1.addField(fm)
# field2 # field2
fm = Field(deck) fm = m1.newField()
fm.name = "Meaning" fm['name'] = "Meaning"
m1.addField(fm) m1.addField(fm)
# field3 # field3
fm = Field(deck) fm = m1.newField()
fm.name = "Reading" fm['name'] = "Reading"
m1.addField(fm) m1.addField(fm)
# template1 # template1
t = Template(deck) t = Template(deck)