new history table

- rename to revlog
- change the pk to time, as we want an index on time, and the old multi-column
  index was expensive and not useful
- remove yes/no count; they can be inferred from the ease
- remove lastFactor, as it's in the previous entry
- remove delay, it can be inferred from last entry
- remove 'next' from nextInterval and nextFactor
- rename 'thinkingTime' to 'userTime'
- rename reps to rep
- migrate old data to new table, and fix some problems in the process: ease0
  -> ease1, and limit thinking time to 60 seconds as it should have been
  previously
This commit is contained in:
Damien Elmes 2011-02-19 13:02:10 +09:00
parent 855de47ffe
commit b6bb03025f
9 changed files with 78 additions and 112 deletions

View file

@ -11,7 +11,7 @@ from anki.errors import DeckAccessError
from anki.stdmodels import BasicModel from anki.stdmodels import BasicModel
from anki.utils import parseTags, tidyHTML, genID, ids2str, hexifyID, \ from anki.utils import parseTags, tidyHTML, genID, ids2str, hexifyID, \
canonifyTags, joinTags, addTags, checksum, fieldChecksum canonifyTags, joinTags, addTags, checksum, fieldChecksum
from anki.history import CardHistoryEntry from anki.revlog import logReview
from anki.models import Model, CardModel, formatQA from anki.models import Model, CardModel, formatQA
from anki.fonts import toPlatformFont from anki.fonts import toPlatformFont
from anki.tags import initTagTables, tagIds from anki.tags import initTagTables, tagIds
@ -25,8 +25,7 @@ from anki.upgrade import upgradeSchema, updateIndices, upgradeDeck, DECK_VERSION
import anki.latex # sets up hook import anki.latex # sets up hook
# ensure all the DB metadata in other files is loaded before proceeding # ensure all the DB metadata in other files is loaded before proceeding
import anki.models, anki.facts, anki.cards import anki.models, anki.facts, anki.cards, anki.media
import anki.history, anki.media
# rest # rest
MATURE_THRESHOLD = 21 MATURE_THRESHOLD = 21
@ -207,7 +206,7 @@ class Deck(object):
self.factCount = self.s.scalar("select count(*) from facts") self.factCount = self.s.scalar("select count(*) from facts")
# day counts # day counts
(self.repsToday, self.newSeenToday) = self.s.first(""" (self.repsToday, self.newSeenToday) = self.s.first("""
select count(), sum(case when reps = 1 then 1 else 0 end) from reviewHistory select count(), sum(case when rep = 1 then 1 else 0 end) from revlog
where time > :t""", t=self.failedCutoff-86400) where time > :t""", t=self.failedCutoff-86400)
self.newSeenToday = self.newSeenToday or 0 self.newSeenToday = self.newSeenToday or 0
print "newSeenToday in answer(), reset called twice" print "newSeenToday in answer(), reset called twice"
@ -815,8 +814,8 @@ limit %s""" % (self.cramOrder, self.queueLimit)))
card.combinedDue = card.due card.combinedDue = card.due
card.toDB(self.s) card.toDB(self.s)
# review history # review history
entry = CardHistoryEntry(card, ease, lastDelay) print "make sure flags is set correctly when reviewing early"
entry.writeSQL(self.s) logReview(self.s, card, ease, 0)
self.modified = now self.modified = now
# remove from queue # remove from queue
self.requeueCard(card, oldSuc) self.requeueCard(card, oldSuc)

View file

@ -95,7 +95,7 @@ class AnkiExporter(Exporter):
if not self.includeSchedulingInfo: if not self.includeSchedulingInfo:
self.deck.updateProgress() self.deck.updateProgress()
self.newDeck.s.statement(""" self.newDeck.s.statement("""
delete from reviewHistory""") delete from revlog""")
self.newDeck.s.statement(""" self.newDeck.s.statement("""
update cards set update cards set
interval = 0, interval = 0,

View file

@ -116,8 +116,8 @@ select
count() as combinedNewReps, count() as combinedNewReps,
date(time-:off, "unixepoch") as day, date(time-:off, "unixepoch") as day,
sum(case when lastInterval > 21 then 1 else 0 end) as matureReps, sum(case when lastInterval > 21 then 1 else 0 end) as matureReps,
count() - sum(case when reps = 1 then 1 else 0 end) as combinedYoungReps, count() - sum(case when rep = 1 then 1 else 0 end) as combinedYoungReps,
sum(thinkingTime) as reviewTime from reviewHistory sum(userTime) as reviewTime from revlog
group by day order by day group by day order by day
""", off=self.deck.utcOffset) """, off=self.deck.utcOffset)
@ -362,8 +362,8 @@ group by day order by day
colours = [easesNewC, easesYoungC, easesMatureC] colours = [easesNewC, easesYoungC, easesMatureC]
bars = [] bars = []
eases = self.deck.s.all(""" eases = self.deck.s.all("""
select (case when reps = 1 then 0 when lastInterval <= 21 then 1 else 2 end) select (case when rep = 1 then 0 when lastInterval <= 21 then 1 else 2 end)
as type, ease, count() from reviewHistory group by type, ease""") as type, ease, count() from revlog group by type, ease""")
d = {} d = {}
for (type, ease, count) in eases: for (type, ease, count) in eases:
type = types[type] type = types[type]

View file

@ -1,64 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import time
from anki.db import *
reviewHistoryTable = Table(
'reviewHistory', metadata,
Column('cardId', Integer, nullable=False),
Column('time', Float, nullable=False, default=time.time),
Column('lastInterval', Float, nullable=False),
Column('nextInterval', Float, nullable=False),
Column('ease', Integer, nullable=False),
Column('delay', Float, nullable=False),
Column('lastFactor', Float, nullable=False),
Column('nextFactor', Float, nullable=False),
Column('reps', Float, nullable=False),
Column('thinkingTime', Float, nullable=False),
Column('yesCount', Float, nullable=False),
Column('noCount', Float, nullable=False),
PrimaryKeyConstraint("cardId", "time"))
class CardHistoryEntry(object):
"Create after rescheduling card."
def __init__(self, card=None, ease=None, delay=None):
if not card:
return
self.cardId = card.id
self.lastInterval = card.lastInterval
self.nextInterval = card.interval
self.lastFactor = card.lastFactor
self.nextFactor = card.factor
self.reps = card.reps
self.yesCount = card.yesCount
self.noCount = card.noCount
self.ease = ease
self.delay = delay
self.thinkingTime = card.thinkingTime()
def writeSQL(self, s):
s.statement("""
insert into reviewHistory
(cardId, lastInterval, nextInterval, ease, delay, lastFactor,
nextFactor, reps, thinkingTime, yesCount, noCount, time)
values (
:cardId, :lastInterval, :nextInterval, :ease, :delay,
:lastFactor, :nextFactor, :reps, :thinkingTime, :yesCount, :noCount,
:time)""",
cardId=self.cardId,
lastInterval=self.lastInterval,
nextInterval=self.nextInterval,
ease=self.ease,
delay=self.delay,
lastFactor=self.lastFactor,
nextFactor=self.nextFactor,
reps=self.reps,
thinkingTime=self.thinkingTime,
yesCount=self.yesCount,
noCount=self.noCount,
time=time.time())
mapper(CardHistoryEntry, reviewHistoryTable)

32
anki/revlog.py Normal file
View file

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import time
from anki.db import *
# Flags: 0=standard review, 1=reschedule due to cram, drill, etc
# Rep: Repetition number. The same number may appear twice if a card has been
# manually rescheduled or answered on multiple sites before a sync.
revlogTable = Table(
'revlog', metadata,
Column('time', Float, nullable=False, primary_key=True, default=time.time),
Column('cardId', Integer, nullable=False),
Column('ease', Integer, nullable=False),
Column('rep', Integer, nullable=False),
Column('lastInterval', Float, nullable=False),
Column('interval', Float, nullable=False),
Column('factor', Float, nullable=False),
Column('userTime', Float, nullable=False),
Column('flags', Integer, nullable=False, default=0))
def logReview(db, card, ease, flags=0):
db.statement("""
insert into revlog values (
:created, :cardId, :ease, :rep, :lastInterval, :interval, :factor,
:userTime, :flags)""",
created=time.time(), cardId=card.id, ease=ease, rep=card.reps,
lastInterval=card.lastInterval, interval=card.interval,
factor=card.factor, userTime=card.thinkingTime(),
flags=flags)

View file

@ -238,7 +238,7 @@ class DeckStats(object):
def getMatureCorrect(self, test=None): def getMatureCorrect(self, test=None):
if not test: if not test:
test = "lastInterval > 21" test = "lastInterval > 21"
head = "select count() from reviewHistory where %s" head = "select count() from revlog where %s"
all = self.deck.s.scalar(head % test) all = self.deck.s.scalar(head % test)
yes = self.deck.s.scalar((head % test) + " and ease > 1") yes = self.deck.s.scalar((head % test) + " and ease > 1")
return (all, yes, yes/float(all)*100) return (all, yes, yes/float(all)*100)
@ -254,7 +254,7 @@ class DeckStats(object):
x = today + 86400*start x = today + 86400*start
y = today + 86400*finish y = today + 86400*finish
return self.deck.s.scalar(""" return self.deck.s.scalar("""
select count(distinct(cast((time-:off)/86400 as integer))) from reviewHistory select count(distinct(cast((time-:off)/86400 as integer))) from revlog
where time >= :x and time <= :y""",x=x,y=y, off=self.deck.utcOffset) where time >= :x and time <= :y""",x=x,y=y, off=self.deck.utcOffset)
def getRepsDone(self, start, finish): def getRepsDone(self, start, finish):
@ -262,7 +262,7 @@ where time >= :x and time <= :y""",x=x,y=y, off=self.deck.utcOffset)
x = time.mktime((now + datetime.timedelta(start)).timetuple()) x = time.mktime((now + datetime.timedelta(start)).timetuple())
y = time.mktime((now + datetime.timedelta(finish)).timetuple()) y = time.mktime((now + datetime.timedelta(finish)).timetuple())
return self.deck.s.scalar( return self.deck.s.scalar(
"select count() from reviewHistory where time >= :x and time <= :y", "select count() from revlog where time >= :x and time <= :y",
x=x, y=y) x=x, y=y)
def getAverageInterval(self): def getAverageInterval(self):
@ -320,7 +320,7 @@ and type >= 0 and relativeDelay in (0,1)""", cutoff=cutoff) or 0) / float(period
def getPastWorkloadPeriod(self, period): def getPastWorkloadPeriod(self, period):
cutoff = time.time() - 86400 * period cutoff = time.time() - 86400 * period
return (self.deck.s.scalar(""" return (self.deck.s.scalar("""
select count(*) from reviewHistory select count(*) from revlog
where time > :cutoff""", cutoff=cutoff) or 0) / float(period) where time > :cutoff""", cutoff=cutoff) or 0) / float(period)
def getNewPeriod(self, period): def getNewPeriod(self, period):
@ -332,5 +332,5 @@ where created > :cutoff""", cutoff=cutoff) or 0)
def getFirstPeriod(self, period): def getFirstPeriod(self, period):
cutoff = time.time() - 86400 * period cutoff = time.time() - 86400 * period
return (self.deck.s.scalar(""" return (self.deck.s.scalar("""
select count(*) from reviewHistory select count(*) from revlog
where reps = 1 and time > :cutoff""", cutoff=cutoff) or 0) where rep = 1 and time > :cutoff""", cutoff=cutoff) or 0)

View file

@ -11,7 +11,6 @@ from anki.errors import *
from anki.models import Model, FieldModel, CardModel from anki.models import Model, FieldModel, CardModel
from anki.facts import Fact, Field from anki.facts import Fact, Field
from anki.cards import Card from anki.cards import Card
from anki.history import CardHistoryEntry
from anki.utils import ids2str, hexifyID, checksum from anki.utils import ids2str, hexifyID, checksum
from anki.media import mediaFiles from anki.media import mediaFiles
from anki.lang import _ from anki.lang import _
@ -585,34 +584,26 @@ insert or replace into deckVars
def bundleHistory(self): def bundleHistory(self):
return self.realLists(self.deck.s.all(""" return self.realLists(self.deck.s.all("""
select cardId, time, lastInterval, nextInterval, ease, delay, select * from revlog where time > :ls""",
lastFactor, nextFactor, reps, thinkingTime, yesCount, noCount
from reviewHistory where time > :ls""",
ls=self.deck.lastSync)) ls=self.deck.lastSync))
def updateHistory(self, history): def updateHistory(self, history):
dlist = [{'cardId': h[0], dlist = [{'time': h[0],
'time': h[1], 'cardId': h[1],
'lastInterval': h[2], 'ease': h[2],
'nextInterval': h[3], 'rep': h[3],
'ease': h[4], 'lastInterval': h[4],
'delay': h[5], 'interval': h[5],
'lastFactor': h[6], 'factor': h[6],
'nextFactor': h[7], 'userTime': h[7],
'reps': h[8], 'flags': h[8]} for h in history]
'thinkingTime': h[9],
'yesCount': h[10],
'noCount': h[11]} for h in history]
if not dlist: if not dlist:
return return
self.deck.s.statements(""" self.deck.s.statements("""
insert or ignore into reviewHistory insert or ignore into revlog
(cardId, time, lastInterval, nextInterval, ease, delay, (:time, :cardId, :ease, :rep, :lastInterval, :interval, :factor,
lastFactor, nextFactor, reps, thinkingTime, yesCount, noCount) :userTime, :flags)""",
values dlist)
(:cardId, :time, :lastInterval, :nextInterval, :ease, :delay,
:lastFactor, :nextFactor, :reps, :thinkingTime, :yesCount, :noCount)""",
dlist)
def bundleSources(self): def bundleSources(self):
return self.realLists(self.deck.s.all("select * from sources")) return self.realLists(self.deck.s.all("select * from sources"))
@ -834,7 +825,7 @@ and cards.id in %s""" % ids2str([c[0] for c in cards])))
if len(l) > 1000: if len(l) > 1000:
return True return True
if self.deck.s.scalar( if self.deck.s.scalar(
"select count() from reviewHistory where time > :ls", "select count() from revlog where time > :ls",
ls=self.deck.lastSync) > 1000: ls=self.deck.lastSync) > 1000:
return True return True
lastDay = date.fromtimestamp(max(0, self.deck.lastSync - 60*60*24)) lastDay = date.fromtimestamp(max(0, self.deck.lastSync - 60*60*24))

View file

@ -2,7 +2,7 @@
# Copyright: Damien Elmes <anki@ichi2.net> # Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html # License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
DECK_VERSION = 73 DECK_VERSION = 74
from anki.lang import _ from anki.lang import _
from anki.media import rebuildMediaDir from anki.media import rebuildMediaDir
@ -37,11 +37,6 @@ create index if not exists ix_cards_modified on cards
deck.s.statement(""" deck.s.statement("""
create index if not exists ix_facts_modified on facts create index if not exists ix_facts_modified on facts
(modified)""") (modified)""")
# priority - temporary index to make compat code faster. this can be
# removed when all clients are on 1.2, as can the ones below
deck.s.statement("""
create index if not exists ix_cards_priority on cards
(priority)""")
# card spacing # card spacing
deck.s.statement(""" deck.s.statement("""
create index if not exists ix_cards_factId on cards (factId)""") create index if not exists ix_cards_factId on cards (factId)""")
@ -237,6 +232,19 @@ this message. (ERR-0101)""") % {
deck.s.statement("drop table if exists stats") deck.s.statement("drop table if exists stats")
deck.version = 73 deck.version = 73
deck.s.commit() deck.s.commit()
if deck.version < 74:
# migrate revlog data to new table
deck.s.statement("""
insert into revlog select
time, cardId, ease, reps, lastInterval, nextInterval, nextFactor,
min(thinkingTime, 60), 0 from reviewHistory""")
deck.s.statement("drop table reviewHistory")
# convert old ease0 into ease1
deck.s.statement("update revlog set ease = 1 where ease = 0")
# remove priority index
deck.s.statement("drop index ix_cards_priority")
deck.version = 74
deck.s.commit()
# executing a pragma here is very slow on large decks, so we store # executing a pragma here is very slow on large decks, so we store

View file

@ -103,7 +103,7 @@ def test_localsync_deck():
c = deck1.getCard() c = deck1.getCard()
deck1.answerCard(c, 4) deck1.answerCard(c, 4)
client.sync() client.sync()
assert deck2.s.scalar("select count(*) from reviewHistory") == 1 assert deck2.s.scalar("select count(*) from revlog") == 1
# make sure meta data is synced # make sure meta data is synced
deck1.setVar("foo", 1) deck1.setVar("foo", 1)
assert deck1.getInt("foo") == 1 assert deck1.getInt("foo") == 1