# -*- coding: utf-8 -*- # Copyright: Damien Elmes # License: GNU AGPL, version 3 or later; http://www.gnu.org/copyleft/agpl.html import os, time, simplejson, re, datetime, shutil from anki.lang import _ from anki.utils import intTime, tmpfile, ids2str, splitFields, base91 from anki.db import DB from anki.collection import _Collection from anki.consts import * from anki.storage import _addSchema, _getColVars, _addColVars, \ _updateIndices # # Upgrading is the first step in migrating to 2.0. # Caller should have called check() on path before calling upgrade(). # class Upgrader(object): def __init__(self): pass # Upgrading ###################################################################### def upgrade(self, path): self.path = path self._openDB(path) self._upgradeSchema() self._openCol() self._upgradeRest() return self.col # Integrity checking ###################################################################### def check(self, path): "True if deck looks ok." with DB(path) as db: return self._check(db) def _check(self, db): # corrupt? try: if db.scalar("pragma integrity_check") != "ok": return except: return # old version? if db.scalar("select version from decks") != 65: return # ensure we have indices for checks below db.executescript(""" create index if not exists ix_cards_factId on cards (factId); create index if not exists ix_fields_factId on fieldModels (factId); analyze;""") # fields missing a field model? if db.list(""" select id from fields where fieldModelId not in ( select distinct id from fieldModels)"""): return # facts missing a field? if db.list(""" select distinct facts.id from facts, fieldModels where facts.modelId = fieldModels.modelId and fieldModels.id not in (select fieldModelId from fields where factId = facts.id)"""): return # cards missing a fact? if db.list(""" select id from cards where factId not in (select id from facts)"""): return # cards missing a card model? if db.list(""" select id from cards where cardModelId not in (select id from cardModels)"""): return # cards with a card model from the wrong model? if db.list(""" select id from cards where cardModelId not in (select cm.id from cardModels cm, facts f where cm.modelId = f.modelId and f.id = cards.factId)"""): return # cards with the wrong ordinal? if db.list(""" select c.id from cards c, cardModels cm where c.cardModelId = cm.id and c.ordinal != cm.ordinal"""): return # facts missing a card? if db.list(""" select facts.id from facts where facts.id not in (select distinct factId from cards)"""): return # dangling fields? if db.list(""" select id from fields where factId not in (select id from facts)"""): return # fields without matching interval if db.list(""" select id from fields where ordinal != (select ordinal from fieldModels where id = fieldModelId)"""): return # incorrect types if db.list(""" select id from cards where relativeDelay != (case when successive then 1 when reps then 0 else 2 end)"""): return if db.list(""" select id from cards where type != (case when type >= 0 then relativeDelay else relativeDelay - 3 end)"""): return return True # DB/Deck opening ###################################################################### def _openDB(self, path): self.tmppath = tmpfile(suffix=".anki2") shutil.copy(path, self.tmppath) self.db = DB(self.tmppath) def _openCol(self): self.col = _Collection(self.db) # Schema upgrade ###################################################################### def _upgradeSchema(self): "Alter tables prior to ORM initialization." db = self.db # speed up the upgrade db.execute("pragma temp_store = memory") db.execute("pragma cache_size = 10000") db.execute("pragma synchronous = off") # these weren't always correctly set db.execute("pragma page_size = 4096") db.execute("pragma legacy_file_format = 0") # notes ########### # tags should have a leading and trailing space if not empty, and not # use commas db.execute(""" update facts set tags = (case when trim(tags) == "" then "" else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " " end) """) # pull facts into memory, so we can merge them with fields efficiently facts = db.all(""" select id, id, modelId, 1, cast(created*1000 as int), cast(modified as int), 0, tags from facts order by created""") # build field hash fields = {} for (fid, ord, val) in db.execute( "select factId, ordinal, value from fields order by factId, ordinal"): if fid not in fields: fields[fid] = [] val = self._mungeField(val) fields[fid].append((ord, val)) # build insert data and transform ids, and minimize qt's # bold/italics/underline cruft. map = {} data = [] factidmap = {} times = {} from anki.utils import minimizeHTML for c, row in enumerate(facts): oldid = row[0] row = list(row) # get rid of old created column and update id while row[4] in times: row[4] += 1000 times[row[4]] = True factidmap[row[0]] = row[4] row[0] = row[4] del row[4] map[oldid] = row[0] # convert old 64bit id into a string, discarding sign bit row[1] = base91(abs(row[1])) row.append(minimizeHTML("\x1f".join([x[1] for x in sorted(fields[oldid])]))) data.append(row) # and put the facts into the new table db.execute("drop table facts") _addSchema(db, False) db.executemany("insert into notes values (?,?,?,?,?,?,?,?,'','',0,'')", data) db.execute("drop table fields") # cards ########### # we need to pull this into memory, to rewrite the creation time if # it's not unique and update the fact id times = {} rows = [] cardidmap = {} for row in db.execute(""" select id, cast(created*1000 as int), factId, ordinal, cast(modified as int), 0, (case relativeDelay when 0 then 1 when 1 then 2 when 2 then 0 end), (case type when 0 then 1 when 1 then 2 when 2 then 0 else type end), cast(due as int), cast(interval as int), cast(factor*1000 as int), reps, noCount from cards order by created"""): # find an unused time row = list(row) while row[1] in times: row[1] += 1000 times[row[1]] = True # rewrite fact id row[2] = factidmap[row[2]] # note id change and save all but old id cardidmap[row[0]] = row[1] rows.append(row[1:]) # drop old table and rewrite db.execute("drop table cards") _addSchema(db, False) db.executemany(""" insert into cards values (?,?,1,?,?,?,?,?,?,?,?,?,?,0,0,0,"")""", rows) # reviewHistory -> revlog ########### # fetch the data so we can rewrite ids quickly r = [] for row in db.execute(""" select cast(time*1000 as int), cardId, 0, ease, cast(nextInterval as int), cast(lastInterval as int), cast(nextFactor*1000 as int), cast(min(thinkingTime, 60)*1000 as int), yesCount from reviewHistory"""): row = list(row) # new card ids try: row[1] = cardidmap[row[1]] except: # id doesn't exist continue # no ease 0 anymore row[3] = row[3] or 1 # determine type, overwriting yesCount newInt = row[4] oldInt = row[5] yesCnt = row[8] # yesCnt included the current answer if row[3] > 1: yesCnt -= 1 if oldInt < 1: # new or failed if yesCnt: # type=relrn row[8] = 2 else: # type=lrn row[8] = 0 else: # type=rev row[8] = 1 r.append(row) db.executemany( "insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)", r) db.execute("drop table reviewHistory") # deck ########### self._migrateDeckTbl() # tags ########### tags = {} for t in db.list("select tag from tags"): tags[t] = intTime() db.execute("update col set tags = ?", simplejson.dumps(tags)) db.execute("drop table tags") db.execute("drop table cardTags") # the rest ########### db.execute("drop table media") db.execute("drop table sources") self._migrateModels() _updateIndices(db) def _migrateDeckTbl(self): db = self.db db.execute("delete from col") db.execute(""" insert or replace into col select id, cast(created as int), :t, :t, 99, 0, 0, cast(lastSync as int), "", "", "", "", "" from decks""", t=intTime()) # prepare a deck to store the old deck options g, gc, conf = _getColVars(db) # delete old selective study settings, which we can't auto-upgrade easily keys = ("newActive", "newInactive", "revActive", "revInactive") for k in keys: db.execute("delete from deckVars where key=:k", k=k) # copy other settings, ignoring deck order as there's a new default gc['new']['perDay'] = db.scalar("select newCardsPerDay from decks") gc['new']['order'] = min(1, db.scalar("select newCardOrder from decks")) # these are collection level, and can't be imported on a per-deck basis # conf['newSpread'] = db.scalar("select newCardSpacing from decks") # conf['timeLim'] = db.scalar("select sessionTimeLimit from decks") # add any deck vars and save dkeys = ("hexCache", "cssCache") for (k, v) in db.execute("select * from deckVars").fetchall(): if k in dkeys: pass else: conf[k] = v _addColVars(db, g, gc, conf) # clean up db.execute("drop table decks") db.execute("drop table deckVars") def _migrateModels(self): import anki.models db = self.db times = {} mods = {} for row in db.all( "select id, name from models"): while 1: t = intTime(1000) if t not in times: times[t] = True break m = anki.models.defaultModel.copy() m['id'] = t m['name'] = row[1] m['mod'] = intTime() m['tags'] = [] m['flds'] = self._fieldsForModel(row[0]) m['tmpls'] = self._templatesForModel(row[0], m['flds']) mods[m['id']] = m db.execute("update notes set mid = ? where mid = ?", t, row[0]) # save and clean up db.execute("update col set models = ?", simplejson.dumps(mods)) db.execute("drop table fieldModels") db.execute("drop table cardModels") db.execute("drop table models") def _fieldsForModel(self, mid): import anki.models db = self.db dconf = anki.models.defaultField flds = [] # note: qsize & qcol are used in upgrade then discarded for c, row in enumerate(db.all(""" select name, features, quizFontFamily, quizFontSize, quizFontColour, editFontSize from fieldModels where modelId = ? order by ordinal""", mid)): conf = dconf.copy() (conf['name'], conf['rtl'], conf['font'], conf['qsize'], conf['qcol'], conf['size']) = row conf['ord'] = c # ensure data is good conf['rtl'] = not not conf['rtl'] conf['font'] = conf['font'] or "Arial" conf['size'] = 12 # will be removed later in upgrade conf['qcol'] = conf['qcol'] or "#000" conf['qsize'] = conf['qsize'] or 20 flds.append(conf) return flds def _templatesForModel(self, mid, flds): import anki.models db = self.db dconf = anki.models.defaultTemplate tmpls = [] for c, row in enumerate(db.all(""" select name, active, qformat, aformat, questionInAnswer, questionAlign, lastFontColour, typeAnswer from cardModels where modelId = ? order by ordinal""", mid)): conf = dconf.copy() (conf['name'], conf['actv'], conf['qfmt'], conf['afmt'], # the following are used in upgrade then discarded hideq, conf['align'], conf['bg'], typeAns) = row conf['ord'] = c for type in ("qfmt", "afmt"): # ensure the new style field format conf[type] = re.sub("%\((.+?)\)s", "{{\\1}}", conf[type]) # some special names have changed conf[type] = re.sub( "(?i){{tags}}", "{{Tags}}", conf[type]) conf[type] = re.sub( "(?i){{cardModel}}", "{{Card}}", conf[type]) conf[type] = re.sub( "(?i){{modelTags}}", "{{Type}}", conf[type]) # type answer is now embedded in the format if typeAns: if type == "qfmt" or hideq: conf[type] += '
{{type:%s}}' % typeAns # q fields now in a if not hideq: conf['afmt'] = ( conf['qfmt'] + "\n\n
\n\n" + conf['afmt']) tmpls.append(conf) return tmpls # Field munging ###################################################################### def _mungeField(self, val): # we no longer wrap fields in white-space: pre-wrap, so we need to # convert previous whitespace into non-breaking spaces def repl(match): return match.group(1).replace(" ", " ") return re.sub("( +)", repl, val) # Template upgrading ###################################################################### # - {{field}} no longer inserts an implicit span, so we make the span # explicit on upgrade. # - likewise with alignment and background color def _upgradeTemplates(self): d = self.col for m in d.models.all(): # cache field styles styles = {} for f in m['flds']: attrs = [] if f['font'].lower() != 'arial': attrs.append("font-family: %s" % f['font']) if f['qsize'] != 20: attrs.append("font-size: %spx" % f['qsize']) if f['qcol'] not in ("black", "#000"): attrs.append("color: %s" % f['qcol']) if f['rtl']: attrs.append("direction: rtl; unicode-bidi: embed") if attrs: styles[f['name']] = '{{%s}}' % ( "; ".join(attrs), f['name']) # obsolete del f['qcol'] del f['qsize'] # then for each template for t in m['tmpls']: def repl(match): field = match.group(2) if field in styles: return match.group(1) + styles[field] # special or non-existant field; leave alone return match.group(0) for k in 'qfmt', 'afmt': # replace old field references t[k] = re.sub("(^|[^{]){{([^{}]+)?}}", repl, t[k]) # then strip extra {}s from other fields t[k] = t[k].replace("{{{", "{{").replace("}}}", "}}") # remove superfluous formatting from 1.0 -> 1.2 upgrade t[k] = re.sub("font-size: ?20px;?", "", t[k]) t[k] = re.sub("(?i)font-family: ?arial;?", "", t[k]) t[k] = re.sub("color: ?#000(000)?;?", "", t[k]) t[k] = re.sub("white-space: ?pre-wrap;?", "", t[k]) t[k] = re.sub('(.+?)', '\\1', t[k]) # adjust css if t['bg'].lower() == "#ffffff": # a bit more intuitive default bg = "white" else: bg = t['bg'] t['css'] = t['css'].replace("white", bg).replace( "center", ("center", "left", "right")[t['align']]) # remove obsolete del t['bg'] del t['align'] # save model d.models.save(m) # Media references ###################################################################### # In 2.0 we drop support for media and latex references in the template, # since they require generating card templates to see what media a note # uses, and are confusing for shared deck users. To ease the upgrade # process, we automatically convert the references to new fields. def _rewriteMediaRefs(self): col = self.col def rewriteRef(key): all, fname = match if all in state['mflds']: # we've converted this field before new = state['mflds'][all] else: # get field name and any prefix/suffix m2 = re.match( "([^{]*)\{\{\{?(?:text:)?([^}]+)\}\}\}?(.*)", fname) # not a field reference? if not m2: return pre, ofld, suf = m2.groups() # get index of field name try: idx = col.models.fieldMap(m)[ofld][0] except: # invalid field or tag reference; don't rewrite return # find a free field name while 1: state['fields'] += 1 fld = "Media %d" % state['fields'] if fld not in col.models.fieldMap(m).keys(): break # add the new field f = col.models.newField(fld) f['qsize'] = 20 f['qcol'] = '#000' col.models.addField(m, f) # loop through notes and write reference into new field data = [] for id, flds in self.col.db.execute( "select id, flds from notes where id in "+ ids2str(col.models.nids(m))): sflds = splitFields(flds) ref = all.replace(fname, pre+sflds[idx]+suf) data.append((flds+ref, id)) # update notes col.db.executemany("update notes set flds=? where id=?", data) # note field for future state['mflds'][fname] = fld new = fld # rewrite reference in template t[key] = t[key].replace(all, "{{{%s}}}" % new) regexps = col.media.regexps + ( r"(\[latex\](.+?)\[/latex\])", r"(\[\$\](.+?)\[/\$\])", r"(\[\$\$\](.+?)\[/\$\$\])") # process each model for m in col.models.all(): state = dict(mflds={}, fields=0) for t in m['tmpls']: for r in regexps: for match in re.findall(r, t['qfmt']): rewriteRef('qfmt') for match in re.findall(r, t['afmt']): rewriteRef('afmt') if state['fields']: col.models.save(m) # Inactive templates ###################################################################### # Templates can't be declared as inactive anymore. Remove any that are # marked inactive and have no dependent cards. def _removeInactive(self): d = self.col for m in d.models.all(): remove = [] for t in m['tmpls']: if not t['actv']: if not d.db.scalar(""" select 1 from cards where nid in (select id from notes where mid = ?) and ord = ? limit 1""", m['id'], t['ord']): remove.append(t) del t['actv'] for r in remove: d.models.remTemplate(m, r) d.models.save(m) # Conditional templates ###################################################################### # For models that don't use a given template in all cards, we'll need to # add a new field to notes to indicate if the card should be generated or not def _addFlagFields(self): for m in self.col.models.all(): nids = self.col.models.nids(m) changed = False for tmpl in m['tmpls']: if self._addFlagFieldsForTemplate(m, nids, tmpl): changed = True if changed: # save model self.col.models.save(m, templates=True) def _addFlagFieldsForTemplate(self, m, nids, tmpl): cids = self.col.db.list( "select id from cards where nid in %s and ord = ?" % ids2str(nids), tmpl['ord']) if len(cids) == len(nids): # not selectively used return # add a flag field name = tmpl['name'] have = [f['name'] for f in m['flds']] while name in have: name += "_" f = self.col.models.newField(name) self.col.models.addField(m, f) # find the notes that have that card haveNids = self.col.db.list( "select nid from cards where id in "+ids2str(cids)) # add "y" to the appended field for those notes self.col.db.execute( "update notes set flds = flds || 'y' where id in "+ids2str( haveNids)) # wrap the template in a conditional tmpl['qfmt'] = "{{#%s}}\n%s\n{{/%s}}" % ( f['name'], tmpl['qfmt'], f['name']) return True # New due times ###################################################################### # New cards now use a user-friendly increasing integer rather than a # timestamp def _rewriteNewDue(self): col = self.col pos = 0 lastDue = None data = [] for id, due in col.db.execute( "select id, due from cards where type = 0"): if due != lastDue: pos += 1 lastDue = due data.append((pos, id)) col.db.executemany("update cards set due = ? where id = ?", data) # Post-schema upgrade ###################################################################### def _upgradeRest(self): "Handle the rest of the upgrade to 2.0." col = self.col # make sure we have a current model id col.models.setCurrent(col.models.models.values()[0]) # remove unused templates that were marked inactive self._removeInactive() # rewrite media references in card template self._rewriteMediaRefs() # template handling has changed self._upgradeTemplates() # add fields for selectively used templates self._addFlagFields() # fix creation time col.sched._updateCutoff() d = datetime.datetime.today() d -= datetime.timedelta(hours=4) d = datetime.datetime(d.year, d.month, d.day) d += datetime.timedelta(hours=4) d -= datetime.timedelta(days=1+int((time.time()-col.crt)/86400)) col.crt = int(time.mktime(d.timetuple())) col.sched._updateCutoff() # update uniq cache col.updateFieldCache(col.db.list("select id from notes")) # remove old views for v in ("failedCards", "revCardsOld", "revCardsNew", "revCardsDue", "revCardsRandom", "acqCardsRandom", "acqCardsOld", "acqCardsNew"): col.db.execute("drop view if exists %s" % v) # remove stats, as it's all in the revlog now col.db.execute("drop table if exists stats") # suspended cards don't use ranges anymore col.db.execute("update cards set queue=-1 where queue between -3 and -1") col.db.execute("update cards set queue=-2 where queue between 3 and 5") col.db.execute("update cards set queue=-3 where queue between 6 and 8") # remove old deleted tables for t in ("cards", "notes", "models", "media"): col.db.execute("drop table if exists %sDeleted" % t) # rewrite due times for new cards self._rewriteNewDue() # and failed cards left = len(col.decks.confForDid(1)['new']['delays']) col.db.execute("update cards set edue = ?, left=? where type = 1", col.sched.today+1, left) # and due cards col.db.execute(""" update cards set due = cast( (case when due < :stamp then 0 else 1 end) + ((due-:stamp)/86400) as int)+:today where type = 2 """, stamp=col.sched.dayCutoff, today=col.sched.today) # possibly re-randomize conf = col.decks.allConf()[0] if not conf['new']['order']: col.sched.randomizeCards(1) # update insertion id col.conf['nextPos'] = ( col.db.scalar("select max(id) from notes") or 0)+1 col.save() # optimize and finish col.db.commit() col.db.execute("vacuum") col.db.execute("analyze") col.db.execute("update col set ver = ?", SCHEMA_VERSION) col.save()