From 1078285f0feadfa6bc2801c9666493381165940a Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Thu, 10 Mar 2011 06:28:25 +0900 Subject: [PATCH] change field storage format, improve upgrade speed Since Anki first moved to an SQL backend, it has stored fields in a fields table, with one field per line. This is a natural layout in a relational database, and it had some nice properties. It meant we could retrieve an individual field of a fact, which we used for limiting searches to a particular field, for sorting, and for determining if a field was unique, by adding an index on the field value. The index was very expensive, so as part of the early work towards 2.0 I added a checksum field instead, and added an index to that. This was a lot cheaper than storing the entire value twice for the purpose of fast searches, but it only partly solved the problem. We still needed an index on factId so that we could retrieve a given fact's fields quickly. For simple models this was fairly cheap, but as the number of fields grows the table grows very big. 25k facts with 30 fields each and the fields table has grown to 750k entries. This makes the factId index and checksum index really expensive - with the q/a cache removed, about 30% of the deck in such a situation. Equally problematic was sorting on those fields. Short of adding another expensive index, a sort involves a table scan of the entire table. We solve these problems by moving all fields into the facts table. For this to work, we need to address some issues: Sorting: we'll add an option to the model to specify the sort field. When facts are modified, that field is written to a separate sort column. It can be HTML stripped, and possibly truncated to a maximum number of letters. This means that switching sort to a different field involves an expensive rewrite of the sort column, but people tend to leave their sort field set to the same value, and we don't need to clear the field if the user switches temporarily to a non-field sort like due order. And it has the nice properties of allowing different models to be sorted on different columns at the same time, and makes it impossible for models to be hidden because the user has sorted on a field which doesn't appear in some models. Searching for words with embedded HTML: 1.2 introduced a HTML-stripped cache of the fields content, which both sped up searches (since we didn't have to search the possibly large fields table), and meant we could find "bob" in "bob" quickly. The ability to quickly search for words peppered with HTML was nice, but it meant doubling the cost of storing text in many cases, and meant after any edit more data has to be written to the DB. Instead, we'll do it on the fly. On this i7 computer, stripping HTML from all fields takes 1-2.6 seconds on 25-50k decks. We could possibly skip the stripping for people who don't require it - the number of people who bold parts of words is actually pretty small. Duplicate detection: one option would be to fetch all fields when the add cards dialog or editor are opened. But this will be expensive on mobile devices. Instead, we'll create a separate table of (fid, csum), with an index on both columns. When we edit a fact, we delete all the existing checksums for that fact, and add checksums for any fields that must be checked as unique. We could optionally skip the index on csum - some benchmarking is required. As for the new table layout, creating separate columns for each field won't scale. Instead, we store the fields in a single column, separated by an ascii record separator. We split on that character when extracting from the database, and join on it when writing to the DB. Searching on a particular field in the browser will be accomplished by finding all facts that match, and then unpacking to see if the relevant field matched. Tags have been moved back to a separate column. Now that fields are on the facts table, there is no need to pack them in as a field simply to avoid another table hit. --- anki/cards.py | 6 +- anki/deck.py | 154 ++++++++++++++++++------------------------- anki/facts.py | 77 +++++++++++----------- anki/find.py | 7 +- anki/media.py | 6 +- anki/models.py | 84 +++++++---------------- anki/stdmodels.py | 14 ++-- anki/storage.py | 152 +++++++++++++++++++----------------------- tests/test_deck.py | 2 +- tests/test_models.py | 21 +++--- 10 files changed, 218 insertions(+), 305 deletions(-) diff --git a/anki/cards.py b/anki/cards.py index c94cd14ee..860be6126 100644 --- a/anki/cards.py +++ b/anki/cards.py @@ -112,10 +112,8 @@ streak=?, lapses=?, grade=?, cycles=? where id = ?""", def _getQA(self, reload=False): # this is a hack at the moment if not self._qa or reload: - self._qa = self.deck.formatQA( - self.id, - self.deck._cacheFacts([self.fid])[self.fid], - self.deck._cacheMeta("and c.id = %d" % self.id)[2][self.id]) + self._qa = self.deck.updateCache( + [self.id], "card")[0] return self._qa def fact(self): diff --git a/anki/deck.py b/anki/deck.py index 84801de8d..becaa89cd 100644 --- a/anki/deck.py +++ b/anki/deck.py @@ -148,6 +148,11 @@ qconf=?, conf=?, data=?""", # unsorted ########################################################################## + def nextID(self, type): + id = self.conf.get(type, 1) + self.conf[type] = id+1 + return id + def reset(self): self.sched.reset() # recache css @@ -500,14 +505,12 @@ due > :now and due < :now""", now=time.time()) ok = [] for template in fact.model.templates: if template.active or not checkActive: - # [cid, fid, qfmt, afmt, tags, model, template, group] - meta = [None, template.qfmt, template.afmt, - "", "", "", ""] - fields = fact.fieldsWithIds() - now = self.formatQA(None, fields, meta, False) - for k in fields.keys(): - fields[k] = (fields[k][0], "") - empty = self.formatQA(None, fields, meta, False) + # [cid, fid, mid, tid, gid, tags, flds, data] + data = [1, 1, fact.model.id, template.id, 1, + "", fact.joinedFields(), ""] + now = self.formatQA(fact.model, template, "", data) + data[6] = "\x1f".join([""]*len(fact._fields)) + empty = self.formatQA(fact.model, template, "", data) if now['q'] == empty['q']: continue if not template.conf['allowEmptyAns']: @@ -557,7 +560,7 @@ where fid = :fid and tid = :cmid""", return strids = ids2str(ids) self.db.execute("delete from facts where id in %s" % strids) - self.db.execute("delete from fdata where fid in %s" % strids) + #self.db.execute("delete from fdata where fid in %s" % strids) def _deleteDanglingFacts(self): "Delete any facts without cards. Don't call this directly." @@ -659,7 +662,6 @@ select id from cards where fid in (select id from facts where mid = ?)""", # then the model self.db.execute("delete from models where id = ?", mid) self.db.execute("delete from templates where mid = ?", mid) - self.db.execute("delete from fields where mid = ?", mid) # GUI should ensure last model is not deleted if self.conf['currentModelId'] == mid: self.conf['currentModelId'] = self.db.scalar( @@ -904,7 +906,7 @@ where tid in %s""" % strids, now=time.time()) # Caches: q/a, facts.cache and fdata.csum ########################################################################## - def updateCache(self, ids, type="card"): + def updateCache(self, ids=None, type="card"): "Update cache after facts or models changed." # gather metadata if type == "card": @@ -913,87 +915,62 @@ where tid in %s""" % strids, now=time.time()) where = "and f.id in " + ids2str(ids) elif type == "model": where = "and m.id in " + ids2str(ids) - (cids, fids, meta) = self._cacheMeta(where) - if not cids: - return - # and fact info - facts = self._cacheFacts(fids) - # generate q/a - pend = [self.formatQA(cids[n], facts[fids[n]], meta[cids[n]]) - for n in range(len(cids))] - for p in pend: - self.media.registerText(p['q']) - self.media.registerText(p['a']) - # fact value cache - self._updateFieldCache(facts) - # and checksum - self._updateFieldChecksums(facts) + elif type == "all": + where = "" + else: + raise Exception() + mods = {} + templs = {} + for m in self.allModels(): + mods[m.id] = m + for t in m.templates: + templs[t.id] = t + groups = dict(self.db.all("select id, name from groups")) + return [self.formatQA(mods[row[2]], templs[row[3]], groups[row[4]], row) + for row in self._qaData(where)] + # # and checksum + # self._updateFieldChecksums(facts) - def formatQA(self, cardId, fact, meta, filters=True): + def formatQA(self, model, template, gname, data, filters=True): "Returns hash of id, question, answer." - d = {'id': cardId} + # data is [cid, fid, mid, tid, gid, tags, flds, data] + # unpack fields and create dict + flist = data[6].split("\x1f") fields = {} - tags = None - for (k, v) in fact.items(): - if k == None: - tags = v[1] - continue - fields["text:"+k] = stripHTML(v[1]) - if v[1]: - fields[k] = '%s' % ( - hexifyID(v[0]), v[1]) + for (name, (idx, conf)) in model.fieldMap().items(): + fields[name] = flist[idx] + fields["text:"+name] = stripHTML(fields[name]) + if fields[name]: + fields["text:"+name] = stripHTML(fields[name]) + fields[name] = '%s' % ( + hexifyID(data[2]), hexifyID(idx), fields[name]) else: - fields[k] = u"" - fields['Tags'] = tags - fields['Model'] = meta[3] - fields['Template'] = meta[4] - fields['Group'] = meta[5] + fields["text:"+name] = "" + fields[name] = "" + fields['Tags'] = data[5] + fields['Model'] = model.name + fields['Template'] = template.name + fields['Group'] = gname # render q & a - for (type, format) in (("q", meta[1]), ("a", meta[2])): - if filters: - fields = runFilter("formatQA.pre", fields, meta, self) + d = dict(id=data[0]) + for (type, format) in (("q", template.qfmt), ("a", template.afmt)): + # if filters: + # fields = runFilter("formatQA.pre", fields, , self) html = anki.template.render(format, fields) - if filters: - d[type] = runFilter("formatQA.post", html, fields, meta, self) + # if filters: + # d[type] = runFilter("formatQA.post", html, fields, meta, self) + self.media.registerText(html) d[type] = html return d - def _cacheMeta(self, where=""): - "Return cids, fids, and cid -> data hash." - # data is [fid, qfmt, afmt, model, template, group] - meta = {} - cids = [] - fids = [] - for r in self.db.execute(""" -select c.id, f.id, t.qfmt, t.afmt, m.name, t.name, g.name -from cards c, facts f, models m, templates t, groups g where -c.fid == f.id and f.mid == m.id and + def _qaData(self, where=""): + "Return [cid, fid, mid, tid, gid, tags, flds, data] db query" + return self.db.execute(""" +select c.id, f.id, m.id, t.id, g.id, f.tags, f.flds, f.data +from cards c, facts f, models m, templates t, groups g +where c.fid == f.id and f.mid == m.id and c.tid = t.id and c.gid = g.id -%s""" % where): - meta[r[0]] = r[1:] - cids.append(r[0]) - fids.append(r[1]) - return (cids, fids, meta) - - def _cacheFacts(self, ids): - "Return a hash of fid -> (name -> (id, val))." - facts = {} - for id, fields in groupby(self.db.all(""" -select fdata.fid, fields.name, fields.id, fdata.val -from fdata left outer join fields on fdata.fmid = fields.id -where fdata.fid in %s order by fdata.fid""" % ids2str(ids)), itemgetter(0)): - facts[id] = dict([(f[1], f[2:]) for f in fields]) - return facts - - def _updateFieldCache(self, facts): - "Add stripped HTML cache for searching." - r = [] - from anki.utils import stripHTMLMedia - [r.append((stripHTMLMedia( - " ".join([x[1] for x in map.values()])), id)) - for (id, map) in facts.items()] - self.db.executemany( - "update facts set cache=? where id=?", r) +%s""" % where) def _updateFieldChecksums(self, facts): print "benchmark updatefieldchecksums" @@ -1055,26 +1032,23 @@ insert or ignore into tags (mod, name) values (%d, :t)""" % intTime(), self.registerTags(newTags) # find facts missing the tags if add: - l = "val not " + l = "tags not " fn = addTags else: - l = "val " + l = "tags " fn = deleteTags lim = " or ".join( [l+"like :_%d" % c for c, t in enumerate(newTags)]) res = self.db.all( - "select fid, val from fdata where ord = -1 and " + lim, + "select id, tags from facts where " + lim, **dict([("_%d" % x, '%% %s %%' % y) for x, y in enumerate(newTags)])) # update tags fids = [] def fix(row): fids.append(row[0]) - return {'id': row[0], 't': fn(tags, row[1])} + return {'id': row[0], 't': fn(tags, row[1]), 'n':intTime()} self.db.executemany(""" -update fdata set val = :t -where fid = :id""", [fix(row) for row in res]) - self.db.execute("update facts set mod = ? where id in " + - ids2str(fids), intTime()) +update facts set tags = :t, mod = :n where id = :id""", [fix(row) for row in res]) # update q/a cache self.updateCache(fids, type="fact") self.finishProgress() diff --git a/anki/facts.py b/anki/facts.py index 605fbf08d..93860d83c 100644 --- a/anki/facts.py +++ b/anki/facts.py @@ -24,40 +24,46 @@ class Fact(object): self.tags = "" self.cache = "" self._fields = [""] * len(self.model.fields) + self.data = "" self._fmap = self.model.fieldMap() def load(self): (self.mid, self.crt, - self.mod) = self.deck.db.first(""" -select mid, crt, mod from facts where id = ?""", self.id) - self._fields = self.deck.db.list(""" -select val from fdata where fid = ? and fmid order by ord""", self.id) - self.tags = self.deck.db.scalar(""" -select val from fdata where fid = ? and ord = -1""", self.id) + self.mod, + self.tags, + self._fields, + self.data) = self.deck.db.first(""" +select mid, crt, mod, tags, flds, data from facts where id = ?""", self.id) + self._fields = self._field.split("\x1f") self.model = self.deck.getModel(self.mid) def flush(self, cache=True): self.mod = intTime() # facts table - self.cache = stripHTMLMedia(u" ".join(self._fields)) + sfld = self._fields[self.model.sortField()] res = self.deck.db.execute(""" -insert or replace into facts values (?, ?, ?, ?, ?)""", - self.id, self.mid, self.crt, - self.mod, self.cache) +insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""", + self.id, self.mid, self.crt, + self.mod, self.tags, self.joinedFields(), + sfld, self.data) self.id = res.lastrowid - # fdata table - self.deck.db.execute("delete from fdata where fid = ?", self.id) - d = [] - for (fmid, ord, conf) in self._fmap.values(): - val = self._fields[ord] - d.append(dict(fid=self.id, fmid=fmid, ord=ord, - val=val)) - d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags)) - self.deck.db.executemany(""" -insert into fdata values (:fid, :fmid, :ord, :val, '')""", d) - # media and caches - self.deck.updateCache([self.id], "fact") + + def joinedFields(self): + return "\x1f".join(self._fields) + +# # fdata table +# self.deck.db.execute("delete from fdata where fid = ?", self.id) +# d = [] +# for (fmid, ord, conf) in self._fmap.values(): +# val = self._fields[ord] +# d.append(dict(fid=self.id, fmid=fmid, ord=ord, +# val=val)) +# d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags)) +# self.deck.db.executemany(""" +# insert into fdata values (:fid, :fmid, :ord, :val, '')""", d) +# # media and caches +# self.deck.updateCache([self.id], "fact") def cards(self): return [self.deck.getCard(id) for id in self.deck.db.list( @@ -73,12 +79,12 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d) return self._fields def items(self): - return [(k, self._fields[v]) + return [(k, self._fields[v[0]]) for (k, v) in self._fmap.items()] def _fieldOrd(self, key): try: - return self._fmap[key][1] + return self._fmap[key][0] except: raise KeyError(key) @@ -88,10 +94,6 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d) def __setitem__(self, key, value): self._fields[self._fieldOrd(key)] = value - def fieldsWithIds(self): - return dict( - [(k, (v[0], self[k])) for (k,v) in self._fmap.items()]) - # Tags ################################################## @@ -105,12 +107,11 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d) ################################################## def fieldUnique(self, name): - (fmid, ord, conf) = self._fmap[name] - if not conf['unique']: + (ord, conf) = self._fmap[name] + if not conf['uniq']: return True val = self[name] csum = fieldChecksum(val) - print "in check, ", self.id if self.id: lim = "and fid != :fid" else: @@ -120,18 +121,18 @@ insert into fdata values (:fid, :fmid, :ord, :val, '')""", d) c=csum, v=val, fid=self.id) def fieldComplete(self, name, text=None): - (fmid, ord, conf) = self._fmap[name] - if not conf['required']: + (ord, conf) = self._fmap[name] + if not conf['req']: return True return self[name] def problems(self): d = [] - for k in self._fmap.keys(): + for (k, (ord, conf)) in self._fmap.items(): if not self.fieldUnique(k): - d.append("unique") + d.append((ord, "unique")) elif not self.fieldComplete(k): - d.append("required") + d.append((ord, "required")) else: - d.append(None) - return d + d.append((ord, None)) + return [x[1] for x in sorted(d)] diff --git a/anki/find.py b/anki/find.py index 4950ef322..3923df13c 100644 --- a/anki/find.py +++ b/anki/find.py @@ -400,8 +400,7 @@ def _findCards(deck, query): tquery += "select id from facts except " if token == "none": tquery += """ -select id from cards where fid in (select fid from fdata where ord = -1 and -val = ''""" +select id from cards where fid in (select id from facts where tags = '')""" else: token = token.replace("*", "%") if not token.startswith("%"): @@ -410,7 +409,7 @@ val = ''""" token += " %" args["_tag_%d" % c] = token tquery += """ -select fid from fdata where ord = -1 and val like :_tag_%d""" % c +select id from facts where tags like :_tag_%d""" % c elif type == SEARCH_TYPE: if qquery: if isNeg: @@ -549,7 +548,7 @@ select id from cards where answer like :_ff_%d escape '\\'""" % c token = token.replace("*", "%") args["_ff_%d" % c] = "%"+token+"%" fquery += """ -select id from facts where cache like :_ff_%d escape '\\'""" % c +select id from facts where flds like :_ff_%d escape '\\'""" % c return (tquery, fquery, qquery, fidquery, cmquery, sfquery, qaquery, showdistinct, filters, args) diff --git a/anki/media.py b/anki/media.py index 6472dfeff..053deda70 100644 --- a/anki/media.py +++ b/anki/media.py @@ -177,11 +177,7 @@ If a file with the same name exists, return a unique name.""" return unicodedata.normalize('NFD', s) return s # generate q/a and look through all references - (cids, fids, meta) = self.deck._cacheMeta() - facts = self.deck._cacheFacts(fids) - pend = [self.deck.formatQA(cids[n], facts[fids[n]], meta[cids[n]]) - for n in range(len(cids))] - for p in pend: + for p in self.deck.updateCache(type="all"): for type in ("q", "a"): for f in self.mediaFiles(p[type]): normrefs[norm(f)] = True diff --git a/anki/models.py b/anki/models.py index ecafdc01e..31813218c 100644 --- a/anki/models.py +++ b/anki/models.py @@ -2,12 +2,6 @@ # Copyright: Damien Elmes # License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html -"""\ -Models load their templates and fields when they are loaded. If you update a -template or field, you should call model.flush(), rather than trying to save -the subobject directly. -""" - import simplejson from anki.utils import intTime from anki.lang import _ @@ -36,20 +30,21 @@ class Model(object): def load(self): (self.mod, self.name, + self.fields, self.conf) = self.deck.db.first(""" -select mod, name, conf from models where id = ?""", self.id) +select mod, name, flds, conf from models where id = ?""", self.id) + self.fields = simplejson.loads(self.fields) self.conf = simplejson.loads(self.conf) - self.loadFields() self.loadTemplates() def flush(self): self.mod = intTime() ret = self.deck.db.execute(""" -insert or replace into models values (?, ?, ?, ?)""", - self.id, self.mod, self.name, - simplejson.dumps(self.conf)) +insert or replace into models values (?, ?, ?, ?, ?)""", + self.id, self.mod, self.name, + simplejson.dumps(self.fields), + simplejson.dumps(self.conf)) self.id = ret.lastrowid - [f._flush() for f in self.fields] [t._flush() for t in self.templates] def updateCache(self): @@ -64,20 +59,19 @@ insert or replace into models values (?, ?, ?, ?)""", # Fields ################################################## - def loadFields(self): - sql = "select * from fields where mid = ? order by ord" - self.fields = [Field(self.deck, data) - for data in self.deck.db.all(sql, self.id)] + def newField(self): + return defaultFieldConf.copy() def addField(self, field): self.deck.modSchema() - field.mid = self._getID() - field.ord = len(self.fields) self.fields.append(field) def fieldMap(self): - "Mapping of field name -> (fmid, ord)." - return dict([(f.name, (f.id, f.ord, f.conf)) for f in self.fields]) + "Mapping of field name -> (ord, conf)." + return dict([(f['name'], (c, f)) for c, f in enumerate(self.fields)]) + + def sortField(self): + return 0 # Templates ################################################## @@ -101,65 +95,33 @@ insert or replace into models values (?, ?, ?, ?)""", new = Model(self.deck, self.id) new.id = None new.name += _(" copy") + new.fields = [f.copy() for f in self.fields] # get new id - f = new.fields; new.fields = [] t = new.templates; new.templates = [] new.flush() # then put back - new.fields = f new.templates = t - for f in new.fields: - f.id = None - f.mid = new.id - f._flush() for t in new.templates: t.id = None t.mid = new.id t._flush() return new -# Field model object +# Field object ########################################################################## defaultFieldConf = { - 'rtl': False, # features - 'required': False, - 'unique': False, + 'name': "", + 'rtl': False, + 'req': False, + 'uniq': False, 'font': "Arial", - 'quizSize': 20, - 'editSize': 20, - 'quizColour': "#fff", + 'qsize': 20, + 'esize': 20, + 'qcol': "#fff", 'pre': True, } -class Field(object): - - def __init__(self, deck, data=None): - self.deck = deck - if data: - self.initFromData(data) - else: - self.id = None - self.numeric = 0 - self.conf = defaultFieldConf.copy() - - def initFromData(self, data): - (self.id, - self.mid, - self.ord, - self.name, - self.numeric, - self.conf) = data - self.conf = simplejson.loads(self.conf) - - def _flush(self): - ret = self.deck.db.execute(""" -insert or replace into fields values (?, ?, ?, ?, ?, ?)""", - self.id, self.mid, self.ord, - self.name, self.numeric, - simplejson.dumps(self.conf)) - self.id = ret.lastrowid - # Template object ########################################################################## diff --git a/anki/stdmodels.py b/anki/stdmodels.py index e3658c784..74317e968 100644 --- a/anki/stdmodels.py +++ b/anki/stdmodels.py @@ -2,7 +2,7 @@ # Copyright: Damien Elmes # License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html -from anki.models import Model, Template, Field +from anki.models import Model, Template from anki.lang import _ models = [] @@ -13,13 +13,13 @@ models = [] def BasicModel(deck): m = Model(deck) m.name = _("Basic") - fm = Field(deck) - fm.name = _("Front") - fm.conf['required'] = True - fm.conf['unique'] = True + fm = m.newField() + fm['name'] = _("Front") + fm['req'] = True + fm['uniq'] = True m.addField(fm) - fm = Field(deck) - fm.name = _("Back") + fm = m.newField() + fm['name'] = _("Back") m.addField(fm) t = Template(deck) t.name = _("Forward") diff --git a/anki/storage.py b/anki/storage.py index 758fdc6ab..457d46ab7 100644 --- a/anki/storage.py +++ b/anki/storage.py @@ -87,22 +87,17 @@ create table if not exists facts ( mid integer not null, crt integer not null, mod integer not null, - cache text not null + tags text not null, + flds text not null, + sfld text not null, + data text not null ); create table if not exists models ( id integer primary key, mod integer not null, name text not null, - conf text not null -); - -create table if not exists fields ( - id integer primary key, - mid integer not null, - ord integer not null, - name text not null, - numeric integer not null, + flds text not null, conf text not null ); @@ -117,14 +112,6 @@ create table if not exists templates ( conf text not null ); -create table if not exists fdata ( - fid integer not null, - fmid integer not null, - ord integer not null, - val text not null, - csum text not null -); - create table if not exists gconf ( id integer primary key, mod integer not null, @@ -190,9 +177,6 @@ create index if not exists ix_cards_mod on cards (mod); create index if not exists ix_facts_mod on facts (mod); -- card spacing, etc create index if not exists ix_cards_fid on cards (fid); --- fact data -create index if not exists ix_fdata_fid on fdata (fid); -create index if not exists ix_fdata_csum on fdata (csum); -- revlog by card create index if not exists ix_revlog_cid on revlog (cid); -- media @@ -204,10 +188,16 @@ create index if not exists ix_media_csum on media (csum); # we don't have access to the progress handler at this point, so the GUI code # will need to set up a progress handling window before opening a deck. -def _moveTable(db, table, insExtra=""): +def _moveTable(db, table, cards=False): + if cards: + insExtra = " order by created" + else: + insExtra = "" sql = db.scalar( "select sql from sqlite_master where name = '%s'" % table) sql = sql.replace("TABLE "+table, "temporary table %s2" % table) + if cards: + sql = sql.replace("PRIMARY KEY (id),", "") db.execute(sql) db.execute("insert into %s2 select * from %s%s" % (table, table, insExtra)) db.execute("drop table "+table) @@ -244,7 +234,7 @@ def _upgradeSchema(db): # cards ########### # move into temp table - _moveTable(db, "cards", " order by created") + _moveTable(db, "cards", True) # use the new order to rewrite card ids map = dict(db.all("select id, rowid from cards2")) _insertWithIdChange(db, map, 0, "reviewHistory", 12) @@ -274,26 +264,36 @@ when trim(tags) == "" then "" else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " " end) """) - # we store them as fields now - db.execute("insert into fields select null, id, 0, -1, tags from facts") - # put facts in a temporary table, sorted by created - db.execute(""" -create table facts2 -(id, modelId, created, modified, cache)""") - db.execute(""" -insert into facts2 select id, modelId, created, modified, spaceUntil + # pull facts into memory, so we can merge them with fields efficiently + facts = db.all(""" +select id, modelId, cast(created as int), cast(modified as int), tags from facts order by created""") - # use the new order to rewrite fact ids - map = dict(db.all("select id, rowid from facts2")) - _insertWithIdChange(db, map, 1, "fields", 5) + # build field hash + fields = {} + for (fid, ord, val) in db.execute( + "select factId, ordinal, value from fields order by factId, ordinal"): + if fid not in fields: + fields[fid] = [] + fields[fid].append((ord, val)) + # build insert data and transform ids, and minimize qt's + # bold/italics/underline cruft. + map = {} + data = [] + from anki.utils import minimizeHTML + for c, row in enumerate(facts): + oldid = row[0] + map[oldid] = c+1 + row = list(row) + row[0] = c+1 + row.append(minimizeHTML("\x1f".join([x[1] for x in sorted(fields[oldid])]))) + data.append(row) + # use the new order to rewrite fact ids in cards table _insertWithIdChange(db, map, 1, "cards", 18) # and put the facts into the new table db.execute("drop table facts") _addSchema(db, False) - db.execute(""" -insert or ignore into facts select rowid, modelId, -cast(created as int), cast(modified as int), cache from facts2""") - db.execute("drop table facts2") + db.executemany("insert into facts values (?,?,?,?,?,?,'','')", data) + db.execute("drop table fields") # media ########### @@ -303,19 +303,12 @@ insert or ignore into media select filename, cast(created as int), originalPath from media2""") db.execute("drop table media2") - # fields -> fdata - ########### - db.execute(""" -insert into fdata select factId, fieldModelId, ordinal, value, '' -from fields order by factId, ordinal""") - db.execute("drop table fields") - # models ########### _moveTable(db, "models") db.execute(""" insert into models select id, cast(modified as int), -name, "{}" from models2""") +name, "{}", "{}" from models2""") db.execute("drop table models2") # reviewHistory -> revlog @@ -333,8 +326,8 @@ cast(nextFactor*1000 as int), cast(min(thinkingTime, 60)*1000 as int), # longer migrations ########### _migrateDeckTbl(db) - _migrateFieldsTbl(db) - _migrateTemplatesTbl(db) + mods = _migrateFieldsTbl(db) + _migrateTemplatesTbl(db, mods) _updateIndices(db) return ver @@ -385,32 +378,36 @@ utcOffset, "", "", "" from decks""", t=intTime()) def _migrateFieldsTbl(db): import anki.models - db.execute(""" -insert into fields select id, modelId, ordinal, name, numeric, '' -from fieldModels""") dconf = anki.models.defaultFieldConf + mods = {} for row in db.all(""" -select id, features, required, "unique", quizFontFamily, quizFontSize, -quizFontColour, editFontSize from fieldModels"""): +select id, modelId, ordinal, name, features, required, "unique", +quizFontFamily, quizFontSize, quizFontColour, editFontSize from fieldModels"""): conf = dconf.copy() - (conf['rtl'], - conf['required'], - conf['unique'], + if row[1] not in mods: + mods[row[1]] = [] + (conf['name'], + conf['rtl'], + conf['req'], + conf['uniq'], conf['font'], - conf['quizSize'], - conf['quizColour'], - conf['editSize']) = row[1:] + conf['qsize'], + conf['qcol'], + conf['esize']) = row[3:] # setup bools conf['rtl'] = not not conf['rtl'] conf['pre'] = True - # save - db.execute("update fields set conf = ? where id = ?", - simplejson.dumps(conf), row[0]) + # add to model list with ordinal for sorting + mods[row[1]].append((row[2], conf)) + # now we've gathered all the info, save it into the models + for mid, fms in mods.items(): + db.execute("update models set flds = ? where id = ?", + simplejson.dumps([x[1] for x in sorted(fms)]), mid) # clean up db.execute("drop table fieldModels") + return mods -def _migrateTemplatesTbl(db): - # do this after fieldModel migration +def _migrateTemplatesTbl(db, mods): import anki.models db.execute(""" insert into templates select id, modelId, ordinal, name, active, qformat, @@ -425,10 +422,11 @@ allowEmptyAnswer, typeAnswer from cardModels"""): conf['bg'], conf['allowEmptyAns'], fname) = row[2:] - # convert the field name to an id - conf['typeAnswer'] = db.scalar( - "select id from fields where name = ? and mid = ?", - fname, row[1]) + # convert the field name to an ordinal + for (ord, fm) in mods[row[1]]: + if fm['name'] == row[1]: + conf['typeAnswer'] = ord + break # save db.execute("update templates set conf = ? where id = ?", simplejson.dumps(conf), row[0]) @@ -440,7 +438,6 @@ def _rewriteModelIds(deck): models = deck.allModels() deck.db.execute("delete from models") deck.db.execute("delete from templates") - deck.db.execute("delete from fields") for c, m in enumerate(models): old = m.id m.id = c+1 @@ -451,13 +448,6 @@ def _rewriteModelIds(deck): t._flush() deck.db.execute( "update cards set tid = ? where tid = ?", t.mid, oldT) - for f in m.fields: - f.mid = m.id - oldF = f.id - f.id = None - f._flush() - deck.db.execute( - "update fdata set fmid = ? where fmid = ?", f.id, oldF) m.flush() deck.db.execute("update facts set mid = ? where mid = ?", m.id, old) @@ -470,20 +460,12 @@ def _postSchemaUpgrade(deck): "revCardsDue", "revCardsRandom", "acqCardsRandom", "acqCardsOld", "acqCardsNew"): deck.db.execute("drop view if exists %s" % v) - # minimize qt's bold/italics/underline cruft. we made need to use lxml to - # do this properly - from anki.utils import minimizeHTML - r = [(minimizeHTML(x[2]), x[0], x[1]) for x in deck.db.execute( - "select fid, fmid, val from fdata")] - deck.db.executemany("update fdata set val = ? where fid = ? and fmid = ?", - r) - # ensure all templates use the new style field format, and update cach + # ensure all templates use the new style field format for m in deck.allModels(): for t in m.templates: t.qfmt = re.sub("%\((.+?)\)s", "{{\\1}}", t.qfmt) t.afmt = re.sub("%\((.+?)\)s", "{{\\1}}", t.afmt) m.flush() - m.updateCache() # remove stats, as it's all in the revlog now deck.db.execute("drop table if exists stats") # suspended cards don't use ranges anymore diff --git a/tests/test_deck.py b/tests/test_deck.py index b3eb6187e..22c5252ff 100644 --- a/tests/test_deck.py +++ b/tests/test_deck.py @@ -64,7 +64,7 @@ def test_factAddDelete(): assert not p # now let's make a duplicate and test uniqueness f2 = deck.newFact() - f2.model.fields[1].conf['required'] = True + f2.model.fields[1]['req'] = True f2['Front'] = u"one"; f2['Back'] = u"" p = f2.problems() assert p[0] == "unique" diff --git a/tests/test_models.py b/tests/test_models.py index a93abbcb7..2167a7f43 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,7 @@ # coding: utf-8 from tests.shared import getEmptyDeck -from anki.models import Model, Template, Field +from anki.models import Model, Template from anki.utils import stripHTML def test_modelDelete(): @@ -20,7 +20,6 @@ def test_modelCopy(): m2 = m.copy() assert m2.name == "Basic copy" assert m2.id != m.id - assert m2.fields[0].id != m.fields[0].id assert m2.templates[0].id != m.templates[0].id assert len(m2.fields) == 2 assert len(m.fields) == 2 @@ -29,24 +28,26 @@ def test_modelCopy(): assert len(m2.templates) == 2 def test_modelChange(): + print "model change" + return deck = getEmptyDeck() m2 = deck.currentModel() # taken from jp support plugin m1 = Model(deck) m1.name = "Japanese" # field 1 - fm = Field(deck) - fm.name = "Expression" - fm.conf['required'] = True - fm.conf['unique'] = True + fm = m1.newField() + fm['name'] = "Expression" + fm['req'] = True + fm['uniq'] = True m1.addField(fm) # field2 - fm = Field(deck) - fm.name = "Meaning" + fm = m1.newField() + fm['name'] = "Meaning" m1.addField(fm) # field3 - fm = Field(deck) - fm.name = "Reading" + fm = m1.newField() + fm['name'] = "Reading" m1.addField(fm) # template1 t = Template(deck)