Anki/anki/upgrade.py
Damien Elmes 1d6dbf9900 rework tag handling and remove cardTags
The tags tables were initially added to speed up the loading of the browser by
speeding up two operations: gathering a list of all tags to show in the
dropdown box, and finding cards with a given tag. The former functionality is
provided by the tags table, and the latter functionality by the cardTags
table.

Selective study is handled by groups now, which perform better since they
don't require a join or subselect, and can be embedded in the index. So the
only remaining benefit of cardTags is for the browser.

Performance testing indicates that cardTags is not saving us a large amount.
It only takes us 30ms to search a 50k card table for matches with a hot cache.
On a cold cache it means the facts table has to be loaded into memory, which
roughly doubles the load time with the default settings (we need to load the
cards table too, as we're sorting the cards), but that startup time was
necessary with certain settings in the past too (sorting by fact created for
example). With groups implemented, the cost of maintaining a cache just for
initial browser load time is hard to justify.

Other changes:

- the tags table has any missing tags added to it when facts are added/edited.
  This means old tags will stick around even when no cards reference them, but
  is much cheaper than reference counting or a separate table, and simplifies
  updates and syncing.
- the tags table has a modified field now so we can can sync it instead of
  having to scan all facts coming across in a sync
- priority field removed
- we no longer put model names or card templates into the tags table. There
  were two reasons we did this in the past: so we could cram/selective study
  them, and for plugins. Selective study uses groups now, and plugins can
  check the model's name instead (and most already do). This also does away
  with the somewhat confusing behaviour of names also being tags.
- facts have their tags as _tags now. You can get a list with tags(), but
  editing operations should use add/deleteTags() instead of manually editing
  the string.
2011-04-28 09:23:29 +09:00

253 lines
9.7 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
DECK_VERSION = 100
import time, simplejson
from anki.db import *
from anki.lang import _
from anki.media import rebuildMediaDir
from anki.utils import intTime
def moveTable(s, table):
sql = s.scalar(
"select sql from sqlite_master where name = '%s'" % table)
sql = sql.replace("TABLE "+table, "temporary table %s2" % table)
s.execute(sql)
s.execute("insert into %s2 select * from %s" % (table, table))
s.execute("drop table "+table)
def upgradeSchema(engine, s):
"Alter tables prior to ORM initialization."
try:
ver = s.scalar("select version from deck limit 1")
except:
ver = s.scalar("select version from decks limit 1")
if ver < 65:
raise Exception("oldDeckVersion")
if ver < 99:
# fields
###########
s.execute(
"alter table fields add column chksum text not null default ''")
# cards
###########
moveTable(s, "cards")
import cards
metadata.create_all(engine, tables=[cards.cardsTable])
s.execute("""
insert into cards select id, factId, 1, cardModelId, cast(modified as int),
question, answer, ordinal, 0, relativeDelay, type, due, cast(interval as int),
cast(factor*1000 as int), reps, successive, noCount, 0, 0 from cards2""")
s.execute("drop table cards2")
# tags
###########
moveTable(s, "tags")
import deck
deck.DeckStorage._addTables(engine)
s.execute("insert or ignore into tags select id, :t, tag from tags2",
{'t':intTime()})
# tags should have a leading and trailing space if not empty, and not
# use commas
s.execute("""
update facts set tags = (case
when trim(tags) == "" then ""
else " " || replace(replace(trim(tags), ",", " "), " ", " ") || " "
end)
""")
s.execute("drop table tags2")
s.execute("drop table cardTags")
# facts
###########
s.execute("""
create table facts2
(id, modelId, modified, tags, cache)""")
# use the rowid to give them an integer order
s.execute("""
insert into facts2 select id, modelId, modified, tags, spaceUntil from
facts order by created""")
s.execute("drop table facts")
import facts
metadata.create_all(engine, tables=[facts.factsTable])
s.execute("""
insert or ignore into facts select id, modelId, rowid,
cast(modified as int), tags, cache from facts2""")
s.execute("drop table facts2")
# media
###########
moveTable(s, "media")
import media
metadata.create_all(engine, tables=[media.mediaTable])
s.execute("""
insert or ignore into media select id, filename, size, cast(created as int),
originalPath from media2""")
s.execute("drop table media2")
# deck
###########
migrateDeck(s, engine)
# models
###########
moveTable(s, "models")
import models
metadata.create_all(engine, tables=[models.modelsTable])
s.execute("""
insert or ignore into models select id, cast(modified as int), name, "" from models2""")
s.execute("drop table models2")
return ver
def migrateDeck(s, engine):
import deck
metadata.create_all(engine, tables=[deck.deckTable])
s.execute("""
insert into deck select id, cast(created as int), cast(modified as int),
0, 99, ifnull(syncName, ""), cast(lastSync as int),
utcOffset, "", "", "" from decks""")
# update selective study
qconf = deck.defaultQconf.copy()
# delete old selective study settings, which we can't auto-upgrade easily
keys = ("newActive", "newInactive", "revActive", "revInactive")
for k in keys:
s.execute("delete from deckVars where key=:k", {'k':k})
# copy other settings, ignoring deck order as there's a new default
keys = ("newCardOrder", "newCardSpacing")
for k in keys:
qconf[k] = s.execute("select %s from decks" % k).scalar()
qconf['newPerDay'] = s.execute(
"select newCardsPerDay from decks").scalar()
# fetch remaining settings from decks table
conf = deck.defaultConf.copy()
data = {}
keys = ("sessionRepLimit", "sessionTimeLimit")
for k in keys:
conf[k] = s.execute("select %s from decks" % k).scalar()
# random and due options merged
qconf['revCardOrder'] = min(2, qconf['revCardOrder'])
# no reverse option anymore
qconf['newCardOrder'] = min(1, qconf['newCardOrder'])
# add any deck vars and save
dkeys = ("hexCache", "cssCache")
for (k, v) in s.execute("select * from deckVars").fetchall():
if k in dkeys:
data[k] = v
else:
conf[k] = v
s.execute("update deck set qconf = :l, config = :c, data = :d",
{'l':simplejson.dumps(qconf),
'c':simplejson.dumps(conf),
'd':simplejson.dumps(data)})
# clean up
s.execute("drop table decks")
s.execute("drop table deckVars")
def updateIndices(db):
"Add indices to the DB."
# sync summaries
db.execute("""
create index if not exists ix_cards_modified on cards
(modified)""")
db.execute("""
create index if not exists ix_facts_modified on facts
(modified)""")
# card spacing
db.execute("""
create index if not exists ix_cards_factId on cards (factId)""")
# fields
db.execute("""
create index if not exists ix_fields_factId on fields (factId)""")
db.execute("""
create index if not exists ix_fields_chksum on fields (chksum)""")
# media
db.execute("""
create index if not exists ix_media_chksum on media (chksum)""")
# deletion tracking
db.execute("""
create index if not exists ix_gravestones_delTime on gravestones (delTime)""")
def upgradeDeck(deck):
"Upgrade deck to the latest version."
if deck.version < DECK_VERSION:
prog = True
deck.startProgress()
deck.updateProgress(_("Upgrading Deck..."))
oldmod = deck.modified
else:
prog = False
if deck.version < 100:
# update dynamic indices given we don't use priority anymore
for d in ("intervalDesc", "intervalAsc", "randomOrder",
"dueAsc", "dueDesc"):
deck.db.statement("drop index if exists ix_cards_%s2" % d)
deck.db.statement("drop index if exists ix_cards_%s" % d)
# remove old views
for v in ("failedCards", "revCardsOld", "revCardsNew",
"revCardsDue", "revCardsRandom", "acqCardsRandom",
"acqCardsOld", "acqCardsNew"):
deck.db.statement("drop view if exists %s" % v)
# remove the expensive value cache
deck.db.statement("drop index if exists ix_fields_value")
# add checksums and index
deck.updateAllFieldChecksums()
# this was only used for calculating average factor
deck.db.statement("drop index if exists ix_cards_factor")
# remove stats, as it's all in the revlog now
deck.db.statement("drop table if exists stats")
# migrate revlog data to new table
deck.db.statement("""
insert or ignore into revlog select
cast(time*1000 as int), cardId, ease, reps,
cast(lastInterval as int), cast(nextInterval as int),
cast(nextFactor*1000 as int), cast(min(thinkingTime, 60)*1000 as int),
0 from reviewHistory""")
deck.db.statement("drop table reviewHistory")
# convert old ease0 into ease1
deck.db.statement("update revlog set ease = 1 where ease = 0")
# remove priority index
deck.db.statement("drop index if exists ix_cards_priority")
# suspended cards don't use ranges anymore
deck.db.execute("update cards set queue=-1 where queue between -3 and -1")
deck.db.execute("update cards set queue=-2 where queue between 3 and 5")
deck.db.execute("update cards set queue=-3 where queue between 6 and 8")
# don't need an index on fieldModelId
deck.db.statement("drop index if exists ix_fields_fieldModelId")
# update schema time
deck.db.statement("update deck set schemaMod = :t", t=intTime())
# remove queueDue as it's become dynamic, and type index
deck.db.statement("drop index if exists ix_cards_queueDue")
deck.db.statement("drop index if exists ix_cards_type")
# remove old deleted tables
for t in ("cards", "facts", "models", "media"):
deck.db.statement("drop table if exists %sDeleted" % t)
# finally, update indices & optimize
updateIndices(deck.db)
# rewrite due times for new cards
deck.db.statement("""
update cards set due = (select pos from facts where factId = facts.id) where type=2""")
# convert due cards into day-based due
deck.db.statement("""
update cards set due = cast(
(case when due < :stamp then 0 else 1 end) +
((due-:stamp)/86400) as int)+:today where type
between 0 and 1""", stamp=deck.sched.dayCutoff, today=deck.sched.today)
print "today", deck.sched.today
print "cut", deck.sched.dayCutoff
# setup qconf & config for dynamicIndices()
deck.qconf = simplejson.loads(deck._qconf)
deck.config = simplejson.loads(deck._config)
deck.data = simplejson.loads(deck._data)
# update factPos
deck.config['nextFactPos'] = deck.db.scalar("select max(pos) from facts")+1
deck.flushConfig()
# add default config
import deck as deckMod
deckMod.DeckStorage._addConfig(deck.engine)
deck.updateDynamicIndices()
deck.db.execute("vacuum")
deck.db.execute("analyze")
deck.version = 100
deck.db.commit()
if prog:
assert deck.modified == oldmod
deck.finishProgress()