Anki/anki/upgrade.py
Damien Elmes 15e4b64162 new approach to hideQ
Instead of a separate option to hide question, embed the question into the
answer format by default. Users who don't want to see the question can remove
the question fields, and users who want a separator between the question and
answer (or not) can control it in HTML now.

Also, remove obsolete field CSS, and don't accidentally chomp a character on
upgrade.
2011-11-18 05:15:15 +09:00

615 lines
22 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/copyleft/agpl.html
import os, time, simplejson, re, datetime, shutil
from anki.lang import _
from anki.utils import intTime, tmpfile, ids2str, splitFields
from anki.db import DB
from anki.deck import _Deck
from anki.consts import *
from anki.storage import _addSchema, _getDeckVars, _addDeckVars, \
_updateIndices
#
# Upgrading is the first step in migrating to 2.0. The ids are temporary and
# may not be unique across multiple decks. After each of a user's v1.2 decks
# are upgraded, they need to be merged via the import code.
#
# Caller should have called check() on path before calling upgrade().
#
class Upgrader(object):
def __init__(self):
pass
# Upgrading
######################################################################
def upgrade(self, path):
self.path = path
self._openDB(path)
self._upgradeSchema()
self._openDeck()
self._upgradeDeck()
return self.deck
# Integrity checking
######################################################################
def check(self, path):
"True if deck looks ok."
with DB(path) as db:
return self._check(db)
def _check(self, db):
# corrupt?
try:
if db.scalar("pragma integrity_check") != "ok":
return
except:
return
# old version?
if db.scalar("select version from decks") != 65:
return
# ensure we have indices for checks below
db.executescript("""
create index if not exists ix_cards_factId on cards (factId);
create index if not exists ix_fields_factId on fieldModels (factId);
analyze;""")
# fields missing a field model?
if db.list("""
select id from fields where fieldModelId not in (
select distinct id from fieldModels)"""):
return
# facts missing a field?
if db.list("""
select distinct facts.id from facts, fieldModels where
facts.modelId = fieldModels.modelId and fieldModels.id not in
(select fieldModelId from fields where factId = facts.id)"""):
return
# cards missing a fact?
if db.list("""
select id from cards where factId not in (select id from facts)"""):
return
# cards missing a card model?
if db.list("""
select id from cards where cardModelId not in
(select id from cardModels)"""):
return
# cards with a card model from the wrong model?
if db.list("""
select id from cards where cardModelId not in (select cm.id from
cardModels cm, facts f where cm.modelId = f.modelId and
f.id = cards.factId)"""):
return
# facts missing a card?
if db.list("""
select facts.id from facts
where facts.id not in (select distinct factId from cards)"""):
return
# dangling fields?
if db.list("""
select id from fields where factId not in (select id from facts)"""):
return
# fields without matching interval
if db.list("""
select id from fields where ordinal != (select ordinal from fieldModels
where id = fieldModelId)"""):
return
# incorrect types
if db.list("""
select id from cards where relativeDelay != (case
when successive then 1 when reps then 0 else 2 end)"""):
return
if db.list("""
select id from cards where type != (case
when type >= 0 then relativeDelay else relativeDelay - 3 end)"""):
return
return True
# DB/Deck opening
######################################################################
def _openDB(self, path):
self.tmppath = tmpfile(suffix=".anki2")
shutil.copy(path, self.tmppath)
self.db = DB(self.tmppath)
def _openDeck(self):
self.deck = _Deck(self.db)
# Schema upgrade
######################################################################
def _upgradeSchema(self):
"Alter tables prior to ORM initialization."
db = self.db
# speed up the upgrade
db.execute("pragma temp_store = memory")
db.execute("pragma cache_size = 10000")
# these weren't always correctly set
db.execute("pragma page_size = 4096")
db.execute("pragma legacy_file_format = 0")
# facts
###########
# tags should have a leading and trailing space if not empty, and not
# use commas
db.execute("""
update facts set tags = (case
when trim(tags) == "" then ""
else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " "
end)
""")
# pull facts into memory, so we can merge them with fields efficiently
facts = db.all("""
select id, id, modelId, 1, cast(created*1000 as int), cast(modified as int),
0, tags from facts order by created""")
# build field hash
fields = {}
for (fid, ord, val) in db.execute(
"select factId, ordinal, value from fields order by factId, ordinal"):
if fid not in fields:
fields[fid] = []
fields[fid].append((ord, val))
# build insert data and transform ids, and minimize qt's
# bold/italics/underline cruft.
map = {}
data = []
factidmap = {}
times = {}
from anki.utils import minimizeHTML
for c, row in enumerate(facts):
oldid = row[0]
row = list(row)
# get rid of old created column and update id
while row[4] in times:
row[4] += 1000
times[row[4]] = True
factidmap[row[0]] = row[4]
row[0] = row[4]
del row[4]
map[oldid] = row[0]
row.append(minimizeHTML("\x1f".join([x[1] for x in sorted(fields[oldid])])))
data.append(row)
# and put the facts into the new table
db.execute("drop table facts")
_addSchema(db, False)
db.executemany("insert into facts values (?,?,?,?,?,?,?,?,'',0,'')", data)
db.execute("drop table fields")
# cards
###########
# we need to pull this into memory, to rewrite the creation time if
# it's not unique and update the fact id
times = {}
rows = []
cardidmap = {}
for row in db.execute("""
select id, cast(created*1000 as int), factId, ordinal,
cast(modified as int), 0,
(case relativeDelay
when 0 then 1
when 1 then 2
when 2 then 0 end),
(case type
when 0 then 1
when 1 then 2
when 2 then 0
else type end),
cast(due as int), cast(interval as int),
cast(factor*1000 as int), reps, noCount from cards
order by created"""):
# find an unused time
row = list(row)
while row[1] in times:
row[1] += 1000
times[row[1]] = True
# rewrite fact id
row[2] = factidmap[row[2]]
# note id change and save all but old id
cardidmap[row[0]] = row[1]
rows.append(row[1:])
# drop old table and rewrite
db.execute("drop table cards")
_addSchema(db, False)
db.executemany("""
insert into cards values (?,?,1,?,?,?,?,?,?,?,?,?,?,0,0,0,"")""",
rows)
# reviewHistory -> revlog
###########
# fetch the data so we can rewrite ids quickly
r = []
for row in db.execute("""
select
cast(time*1000 as int), cardId, 0, ease,
cast(nextInterval as int), cast(lastInterval as int),
cast(nextFactor*1000 as int), cast(min(thinkingTime, 60)*1000 as int),
yesCount from reviewHistory"""):
row = list(row)
# new card ids
try:
row[1] = cardidmap[row[1]]
except:
# id doesn't exist
continue
# no ease 0 anymore
row[2] = row[2] or 1
# determine type, overwriting yesCount
newInt = row[3]
oldInt = row[4]
yesCnt = row[7]
# yesCnt included the current answer
if row[2] > 1:
yesCnt -= 1
if oldInt < 1:
# new or failed
if yesCnt:
# type=relrn
row[7] = 2
else:
# type=lrn
row[7] = 0
else:
# type=rev
row[7] = 1
r.append(row)
db.executemany(
"insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)", r)
db.execute("drop table reviewHistory")
# deck
###########
self._migrateDeckTbl()
# tags
###########
tags = {}
for t in db.list("select tag from tags"):
tags[t] = intTime()
db.execute("update deck set tags = ?", simplejson.dumps(tags))
db.execute("drop table tags")
db.execute("drop table cardTags")
# the rest
###########
db.execute("drop table media")
db.execute("drop table sources")
self._migrateModels()
_updateIndices(db)
def _migrateDeckTbl(self):
import anki.deck
db = self.db
db.execute("delete from deck")
db.execute("""
insert or replace into deck select id, cast(created as int), :t,
:t, 99, 0, 0, cast(lastSync as int),
"", "", "", "", "" from decks""", t=intTime())
# prepare a group to store the old deck options
g, gc, conf = _getDeckVars(db)
# delete old selective study settings, which we can't auto-upgrade easily
keys = ("newActive", "newInactive", "revActive", "revInactive")
for k in keys:
db.execute("delete from deckVars where key=:k", k=k)
# copy other settings, ignoring deck order as there's a new default
g['newSpread'] = db.scalar("select newCardSpacing from decks")
g['newPerDay'] = db.scalar("select newCardsPerDay from decks")
g['repLim'] = db.scalar("select sessionRepLimit from decks")
g['timeLim'] = db.scalar("select sessionTimeLimit from decks")
# this needs to be placed in the model later on
conf['oldNewOrder'] = db.scalar("select newCardOrder from decks")
# no reverse option anymore
conf['oldNewOrder'] = min(1, conf['oldNewOrder'])
# add any deck vars and save
dkeys = ("hexCache", "cssCache")
for (k, v) in db.execute("select * from deckVars").fetchall():
if k in dkeys:
pass
else:
conf[k] = v
_addDeckVars(db, g, gc, conf)
# clean up
db.execute("drop table decks")
db.execute("drop table deckVars")
def _migrateModels(self):
import anki.models
db = self.db
times = {}
mods = {}
for row in db.all(
"select id, name from models"):
while 1:
t = intTime(1000)
if t not in times:
times[t] = True
break
m = anki.models.defaultModel.copy()
m['id'] = t
m['name'] = row[1]
m['mod'] = intTime()
m['tags'] = []
m['flds'] = self._fieldsForModel(row[0])
m['tmpls'] = self._templatesForModel(row[0], m['flds'])
mods[m['id']] = m
db.execute("update facts set mid = ? where mid = ?", t, row[0])
# save and clean up
db.execute("update deck set models = ?", simplejson.dumps(mods))
db.execute("drop table fieldModels")
db.execute("drop table cardModels")
db.execute("drop table models")
def _fieldsForModel(self, mid):
import anki.models
db = self.db
dconf = anki.models.defaultField
flds = []
for c, row in enumerate(db.all("""
select name, features, required, "unique",
quizFontFamily, quizFontSize, quizFontColour, editFontSize from fieldModels
where modelId = ?
order by ordinal""", mid)):
conf = dconf.copy()
(conf['name'],
conf['rtl'],
conf['req'],
conf['uniq'],
conf['font'],
conf['qsize'],
conf['qcol'],
conf['esize']) = row
conf['ord'] = c
# ensure data is good
conf['rtl'] = not not conf['rtl']
conf['pre'] = True
conf['font'] = conf['font'] or "Arial"
conf['qcol'] = conf['qcol'] or "#000"
conf['qsize'] = conf['qsize'] or 20
conf['esize'] = conf['esize'] or 20
flds.append(conf)
return flds
def _templatesForModel(self, mid, flds):
import anki.models
db = self.db
dconf = anki.models.defaultTemplate
tmpls = []
for c, row in enumerate(db.all("""
select name, active, qformat, aformat, questionInAnswer,
questionAlign, lastFontColour, typeAnswer from cardModels
where modelId = ?
order by ordinal""", mid)):
conf = dconf.copy()
(conf['name'],
conf['actv'],
conf['qfmt'],
conf['afmt'],
hideq,
conf['align'],
conf['bg'],
conf['typeAns']) = row
conf['ord'] = c
# q fields now in a
if not hideq:
conf['afmt'] = conf['qfmt'] + "\n\n<hr>\n\n" + conf['afmt']
# convert the field name to an ordinal
ordN = None
for (ord, fm) in enumerate(flds):
if fm['name'] == conf['typeAns']:
ordN = ord
break
if ordN is not None:
conf['typeAns'] = ordN
else:
conf['typeAns'] = None
for type in ("qfmt", "afmt"):
# ensure the new style field format
conf[type] = re.sub("%\((.+?)\)s", "{{\\1}}", conf[type])
# some special names have changed
conf[type] = re.sub(
"(?i){{tags}}", "{{Tags}}", conf[type])
conf[type] = re.sub(
"(?i){{cardModel}}", "{{Template}}", conf[type])
conf[type] = re.sub(
"(?i){{modelTags}}", "{{Model}}", conf[type])
tmpls.append(conf)
return tmpls
# Template upgrading
######################################################################
# {{field}} no longer inserts an implicit span, so we make the span
# explicit on upgrade.
def _upgradeTemplates(self):
d = self.deck
for m in d.models.all():
# cache field styles
styles = {}
for f in m['flds']:
attrs = [
"font-family:%s" % f['font'],
"font-size:%spx" % f['qsize'],
"color:%s" % f['qcol']]
if f['rtl']:
attrs.append("direction:rtl;unicode-bidi:embed")
if f['pre']:
attrs.append("white-space:pre-wrap")
styles[f['name']] = '<span style="%s">\n{{%s}}\n</span>' % (
";".join(attrs), f['name'])
# then for each template
for t in m['tmpls']:
def repl(match):
field = match.group(2)
if field in styles:
return match.group(1) + styles[field]
# special or non-existant field; leave alone
return match.group(0)
for k in 'qfmt', 'afmt':
t[k] = re.sub("(^|[^{]){{([^{}]+)?}}", repl, t[k])
# save model
d.models.save(m)
# Media references
######################################################################
# In 2.0 we drop support for media and latex references in the template,
# since they require generating card templates to see what media a fact
# uses, and are confusing for shared deck users. To ease the upgrade
# process, we automatically convert the references to new fields.
def _rewriteMediaRefs(self):
deck = self.deck
def rewriteRef(key):
all, fname = match
if all in state['mflds']:
# we've converted this field before
new = state['mflds'][all]
else:
# get field name and any prefix/suffix
m2 = re.match(
"([^{]*)\{\{\{?(?:text:)?([^}]+)\}\}\}?(.*)",
fname)
# not a field reference?
if not m2:
return
pre, ofld, suf = m2.groups()
# get index of field name
try:
idx = deck.models.fieldMap(m)[ofld][0]
except:
# invalid field or tag reference; don't rewrite
return
# find a free field name
while 1:
state['fields'] += 1
fld = "Media %d" % state['fields']
if fld not in deck.models.fieldMap(m).keys():
break
# add the new field
f = deck.models.newField(fld)
deck.models.addField(m, f)
# loop through facts and write reference into new field
data = []
for id, flds in self.deck.db.execute(
"select id, flds from facts where id in "+
ids2str(deck.models.fids(m))):
sflds = splitFields(flds)
ref = all.replace(fname, pre+sflds[idx]+suf)
data.append((flds+ref, id))
# update facts
deck.db.executemany("update facts set flds=? where id=?",
data)
# note field for future
state['mflds'][fname] = fld
new = fld
# rewrite reference in template
t[key] = t[key].replace(all, "{{{%s}}}" % new)
regexps = deck.media.regexps + (
r"(\[latex\](.+?)\[/latex\])",
r"(\[\$\](.+?)\[/\$\])",
r"(\[\$\$\](.+?)\[/\$\$\])")
# process each model
for m in deck.models.all():
state = dict(mflds={}, fields=0)
for t in m['tmpls']:
for r in regexps:
for match in re.findall(r, t['qfmt']):
rewriteRef('qfmt')
for match in re.findall(r, t['afmt']):
rewriteRef('afmt')
if state['fields']:
deck.models.save(m)
# Inactive templates
######################################################################
# Templates can't be declared as inactive anymore. Remove any that are
# marked inactive and have no dependent cards.
def _removeInactive(self):
d = self.deck
for m in d.models.all():
remove = []
for t in m['tmpls']:
if not t['actv']:
if not d.db.scalar("""
select 1 from cards where fid in (select id from facts where mid = ?)
and ord = ? limit 1""", m['id'], t['ord']):
remove.append(t)
del t['actv']
for r in remove:
m['tmpls'].remove(t)
d.models.save(m)
# Upgrading deck
######################################################################
def _upgradeDeck(self):
"Handle the rest of the upgrade to 2.0."
import anki.deck
deck = self.deck
# make sure we have a current model id
deck.models.setCurrent(deck.models.models.values()[0])
# remove unused templates that were marked inactive
self._removeInactive()
# rewrite media references in card template
self._rewriteMediaRefs()
# template handling has changed
self._upgradeTemplates()
# regenerate css, and set new card order
for m in deck.models.all():
m['newOrder'] = deck.conf['oldNewOrder']
deck.models.save(m)
del deck.conf['oldNewOrder']
# fix creation time
deck.sched._updateCutoff()
d = datetime.datetime.today()
d -= datetime.timedelta(hours=4)
d = datetime.datetime(d.year, d.month, d.day)
d += datetime.timedelta(hours=4)
d -= datetime.timedelta(days=1+int((time.time()-deck.crt)/86400))
deck.crt = int(time.mktime(d.timetuple()))
deck.sched._updateCutoff()
# update uniq cache
deck.updateFieldCache(deck.db.list("select id from facts"))
# remove old views
for v in ("failedCards", "revCardsOld", "revCardsNew",
"revCardsDue", "revCardsRandom", "acqCardsRandom",
"acqCardsOld", "acqCardsNew"):
deck.db.execute("drop view if exists %s" % v)
# remove stats, as it's all in the revlog now
deck.db.execute("drop table if exists stats")
# suspended cards don't use ranges anymore
deck.db.execute("update cards set queue=-1 where queue between -3 and -1")
deck.db.execute("update cards set queue=-2 where queue between 3 and 5")
deck.db.execute("update cards set queue=-3 where queue between 6 and 8")
# remove old deleted tables
for t in ("cards", "facts", "models", "media"):
deck.db.execute("drop table if exists %sDeleted" % t)
# rewrite due times for new cards
deck.db.execute("""
update cards set due = fid where type=0""")
# and failed cards
left = len(deck.groups.conf(1)['new']['delays'])
deck.db.execute("update cards set edue = ?, left=? where type = 1",
deck.sched.today+1, left)
# and due cards
deck.db.execute("""
update cards set due = cast(
(case when due < :stamp then 0 else 1 end) +
((due-:stamp)/86400) as int)+:today where type = 2
""", stamp=deck.sched.dayCutoff, today=deck.sched.today)
# possibly re-randomize
if deck.models.randomNew():
deck.sched.randomizeCards()
# update insertion id
deck.conf['nextPos'] = deck.db.scalar("select max(id) from facts")+1
deck.save()
# optimize and finish
deck.db.commit()
deck.db.execute("vacuum")
deck.db.execute("analyze")
deck.db.execute("update deck set ver = ?", SCHEMA_VERSION)
deck.save()