group scheduling refactor

see the following for background discussion:
http://groups.google.com/group/ankisrs-users/browse_thread/thread/4db5e82f7dff74fb

- change sched index to the more efficient gid, queue, due
- drop the dynamic index support. as there's no no q/a cache anymore, it's
  cheap enough to hit the cards table directly, and we can't use the index in
  its new form.
- drop order by clauses (see todo)
- ensure there's always an active group. if users want to study all groups at
  once, they need to create a top level group. we do this because otherwise
  the 'top level group' that's active when everything is selected is not
  clear.

to do:

- new cards will appear in gid order, but the gid numbers don't reflect
  alphabetical sorting. we need to change the scheduling code so that it steps
  through each group in turn
- likewise for the learn queue
This commit is contained in:
Damien Elmes 2011-09-22 11:54:01 +09:00
parent dac46752ed
commit 024c42fef8
10 changed files with 76 additions and 109 deletions

View file

@ -16,9 +16,9 @@ NEW_TODAY_ORD = 0
NEW_TODAY_DUE = 1 NEW_TODAY_DUE = 1
# review card sort order # review card sort order
REV_CARDS_OLD_FIRST = 0 REV_CARDS_RANDOM = 0
REV_CARDS_NEW_FIRST = 1 REV_CARDS_OLD_FIRST = 1
REV_CARDS_RANDOM = 2 REV_CARDS_NEW_FIRST = 2
# removal types # removal types
REM_CARD = 0 REM_CARD = 0

View file

@ -66,11 +66,10 @@ class CramScheduler(Scheduler):
else: else:
maxlim = "" maxlim = ""
self.newQueue = self.deck.db.list(""" self.newQueue = self.deck.db.list("""
select id from cards where queue = 2 and due >= %d select id from cards where gid in %s and queue = 2 and due >= %d
%s %s order by %s limit %d""" % (self._groupLimit(),
%s order by %s limit %d""" % (self.today+1+self.min, self.today+1+self.min,
maxlim, maxlim,
self._groupLimit(),
self.order, self.order,
self.reportLimit)) self.reportLimit))
self.newCount = len(self.newQueue) self.newCount = len(self.newQueue)

View file

@ -20,9 +20,9 @@ import anki.cards, anki.facts, anki.template, anki.cram, anki.find
defaultConf = { defaultConf = {
# scheduling options # scheduling options
'activeGroups': [], 'activeGroups': [1],
'topGroup': 1, 'topGroup': 1,
'curGroup': None, 'curGroup': 1,
'revOrder': REV_CARDS_RANDOM, 'revOrder': REV_CARDS_RANDOM,
# other config # other config
'nextPos': 1, 'nextPos': 1,

View file

@ -114,12 +114,13 @@ class GroupManager(object):
g['conf'] = 1 g['conf'] = 1
while 1: while 1:
id = intTime(1000) id = intTime(1000)
if str(id) in self.groups: if str(id) not in self.groups:
continue break
g['id'] = id g['id'] = id
self.groups[str(id)] = g self.groups[str(id)] = g
self.save(g) self.save(g)
return int(id) self.maybeAddToActive(g)
return int(id)
def rem(self, gid, cardsToo=False): def rem(self, gid, cardsToo=False):
"Remove the group. If cardsToo, delete any cards inside." "Remove the group. If cardsToo, delete any cards inside."
@ -179,9 +180,14 @@ class GroupManager(object):
def update(self, g): def update(self, g):
"Add or update an existing group. Used for syncing and merging." "Add or update an existing group. Used for syncing and merging."
self.groups[str(g['id'])] = g self.groups[str(g['id'])] = g
self.maybeAddToActive(g)
# mark registry changed, but don't bump mod time # mark registry changed, but don't bump mod time
self.save() self.save()
def maybeAddToActive(self, g):
# since order is important, we can't just append to the end
self.select(self.selected())
def updateConf(self, g): def updateConf(self, g):
self.gconf[str(g['id'])] = g self.gconf[str(g['id'])] = g
self.save() self.save()
@ -209,16 +215,11 @@ usn=?,mod=? where id in %s""" % ids2str(cids),
return self.deck.conf['activeGroups'] return self.deck.conf['activeGroups']
def selected(self): def selected(self):
"The currently selected gid, or None if whole collection." "The currently selected gid."
return self.deck.conf['curGroup'] return self.deck.conf['curGroup']
def select(self, gid): def select(self, gid):
"Select a new group. If gid is None, select whole collection." "Select a new branch."
if not gid:
self.deck.conf['topGroup'] = 1
self.deck.conf['curGroup'] = None
self.deck.conf['activeGroups'] = []
return
# save the top level group # save the top level group
name = self.groups[str(gid)]['name'] name = self.groups[str(gid)]['name']
self.deck.conf['topGroup'] = self.topFor(name) self.deck.conf['topGroup'] = self.topFor(name)
@ -235,3 +236,9 @@ usn=?,mod=? where id in %s""" % ids2str(cids),
"The top level gid for NAME." "The top level gid for NAME."
path = name.split("::") path = name.split("::")
return self.id(path[0]) return self.id(path[0])
def underSelected(self, name):
"True if name is under the selected group."
# if nothing is selected, always true
s = self.selected()
return name.startswith(self.get(s)['name'])

View file

@ -73,7 +73,7 @@ class Scheduler(object):
"Return counts over next DAYS. Includes today." "Return counts over next DAYS. Includes today."
daysd = dict(self.deck.db.all(""" daysd = dict(self.deck.db.all("""
select due, count() from cards select due, count() from cards
where queue = 2 %s where gid in %s and queue = 2
and due between ? and ? and due between ? and ?
group by due group by due
order by due""" % self._groupLimit(), order by due""" % self._groupLimit(),
@ -230,13 +230,13 @@ select 1 from cards where gid = ? and
else: else:
self.newCount = self.deck.db.scalar(""" self.newCount = self.deck.db.scalar("""
select count() from (select id from cards where select count() from (select id from cards where
queue = 0 %s limit %d)""" % (self._groupLimit(), lim)) gid in %s and queue = 0 limit %d)""" % (self._groupLimit(), lim))
def _resetNew(self): def _resetNew(self):
lim = min(self.queueLimit, self.newCount) lim = min(self.queueLimit, self.newCount)
self.newQueue = self.deck.db.all(""" self.newQueue = self.deck.db.all("""
select id, due from cards where select id, due from cards where
queue = 0 %s order by due limit %d""" % (self._groupLimit(), gid in %s and queue = 0 limit %d""" % (self._groupLimit(),
lim)) lim))
self.newQueue.reverse() self.newQueue.reverse()
self._updateNewCardRatio() self._updateNewCardRatio()
@ -289,14 +289,14 @@ queue = 0 %s order by due limit %d""" % (self._groupLimit(),
self._updateStatsDay("lrn") self._updateStatsDay("lrn")
self.lrnCount = self.deck.db.scalar(""" self.lrnCount = self.deck.db.scalar("""
select count() from (select id from cards where select count() from (select id from cards where
queue = 1 %s and due < ? limit %d)""" % ( gid in %s and queue = 1 and due < ? limit %d)""" % (
self._groupLimit(), self.reportLimit), self._groupLimit(), self.reportLimit),
intTime() + self.deck.groups.top()['collapseTime']) intTime() + self.deck.groups.top()['collapseTime'])
def _resetLrn(self): def _resetLrn(self):
self.lrnQueue = self.deck.db.all(""" self.lrnQueue = self.deck.db.all("""
select due, id from cards where select due, id from cards where
queue = 1 %s and due < :lim order by due gid in %s and queue = 1 and due < :lim
limit %d""" % (self._groupLimit(), self.reportLimit), lim=self.dayCutoff) limit %d""" % (self._groupLimit(), self.reportLimit), lim=self.dayCutoff)
def _getLrnCard(self, collapse=False): def _getLrnCard(self, collapse=False):
@ -417,22 +417,19 @@ where queue = 1 and type = 2
self._updateStatsDay("rev") self._updateStatsDay("rev")
self.revCount = self.deck.db.scalar(""" self.revCount = self.deck.db.scalar("""
select count() from (select id from cards where select count() from (select id from cards where
queue = 2 %s and due <= :lim limit %d)""" % ( gid in %s and queue = 2 and due <= :lim limit %d)""" % (
self._groupLimit(), self.reportLimit), self._groupLimit(), self.reportLimit),
lim=self.today) lim=self.today)
def _resetRev(self): def _resetRev(self):
self.revQueue = self.deck.db.list(""" self.revQueue = self.deck.db.list("""
select id from cards where select id from cards where
queue = 2 %s and due <= :lim order by %s limit %d""" % ( gid in %s and queue = 2 and due <= :lim %s limit %d""" % (
self._groupLimit(), self._revOrder(), self.queueLimit), self._groupLimit(), self._revOrder(), self.queueLimit),
lim=self.today) lim=self.today)
if self.deck.conf['revOrder'] == REV_CARDS_RANDOM: r = random.Random()
r = random.Random() r.seed(self.today)
r.seed(self.today) r.shuffle(self.revQueue)
r.shuffle(self.revQueue)
else:
self.revQueue.reverse()
def _getRevCard(self): def _getRevCard(self):
if self._haveRevCards(): if self._haveRevCards():
@ -446,9 +443,9 @@ queue = 2 %s and due <= :lim order by %s limit %d""" % (
return self.revQueue return self.revQueue
def _revOrder(self): def _revOrder(self):
return ("ivl desc", if self.deck.conf['revOrder']:
"ivl", return "order by %s" % ("ivl desc", "ivl")[self.deck.conf['revOrder']-1]
"due")[self.deck.conf['revOrder']] return ""
# Answering a review card # Answering a review card
########################################################################## ##########################################################################
@ -585,11 +582,7 @@ queue = 2 %s and due <= :lim order by %s limit %d""" % (
return self.deck.groups.conf(card.gid) return self.deck.groups.conf(card.gid)
def _groupLimit(self): def _groupLimit(self):
l = self.deck.groups.active() return ids2str(self.deck.groups.active())
if not l:
# everything
return ""
return " and gid in %s" % ids2str(l)
# Daily cutoff # Daily cutoff
########################################################################## ##########################################################################
@ -645,16 +638,15 @@ queue = 2 %s and due <= :lim order by %s limit %d""" % (
def revTomorrow(self): def revTomorrow(self):
"Number of reviews due tomorrow." "Number of reviews due tomorrow."
return self.deck.db.scalar( return self.deck.db.scalar(
"select count() from cards where queue = 2 and due = ?"+ "select count() from cards where gid in %s and queue = 2 and due = ?"%
self._groupLimit(), self._groupLimit(), self.today+1)
self.today+1)
def newTomorrow(self): def newTomorrow(self):
"Number of new cards tomorrow." "Number of new cards tomorrow."
lim = self.deck.groups.top()['newPerDay'] lim = self.deck.groups.top()['newPerDay']
return self.deck.db.scalar( return self.deck.db.scalar(
"select count() from (select id from cards where " "select count() from (select id from cards where "
"queue = 0 %s limit %d)" % (self._groupLimit(), lim)) "gid in %s and queue = 0 limit %d)" % (self._groupLimit(), lim))
# Next time reports # Next time reports
########################################################################## ##########################################################################
@ -732,31 +724,6 @@ queue = 2 %s and due <= :lim order by %s limit %d""" % (
"Number of cards answered today." "Number of cards answered today."
return sum(self.counts()) return sum(self.counts())
# Dynamic indices
##########################################################################
# fixme: warn user that the default is faster
def updateDynamicIndices(self):
"Call this after revOrder is changed. Bumps schema."
# determine required columns
required = []
if self.deck.conf['revOrder'] in (
REV_CARDS_OLD_FIRST, REV_CARDS_NEW_FIRST):
required.append("interval")
cols = ["queue", "due", "gid"] + required
# update if changed
if self.deck.db.scalar(
"select 1 from sqlite_master where name = 'ix_cards_multi'"):
rows = self.deck.db.all("pragma index_info('ix_cards_multi')")
else:
rows = None
if not (rows and cols == [r[2] for r in rows]):
self.deck.db.execute("drop index if exists ix_cards_multi")
self.deck.db.execute("create index ix_cards_multi on cards (%s)" %
", ".join(cols))
self.deck.db.execute("analyze")
self.deck.modSchema()
# Resetting # Resetting
########################################################################## ##########################################################################

View file

@ -97,11 +97,10 @@ class DeckStats(object):
self.width = 600 self.width = 600
self.height = 200 self.height = 200
def report(self, type=0, selective=True): def report(self, type=0):
# 0=days, 1=weeks, 2=months # 0=days, 1=weeks, 2=months
# period-dependent graphs # period-dependent graphs
self.type = type self.type = type
self.selective = selective
txt = self.css txt = self.css
txt += self.dueGraph() txt += self.dueGraph()
txt += self.repsGraph() txt += self.repsGraph()
@ -179,7 +178,7 @@ select (due-:today)/:chunk as day,
sum(case when ivl < 21 then 1 else 0 end), -- yng sum(case when ivl < 21 then 1 else 0 end), -- yng
sum(case when ivl >= 21 then 1 else 0 end) -- mtr sum(case when ivl >= 21 then 1 else 0 end) -- mtr
from cards from cards
where queue = 2 %s where gid in %s and queue = 2
%s %s
group by day order by day""" % (self._limit(), lim), group by day order by day""" % (self._limit(), lim),
today=self.deck.sched.today, today=self.deck.sched.today,
@ -392,11 +391,11 @@ group by day order by day)""" % lim,
chunk = 30; lim = "" chunk = 30; lim = ""
data = [self.deck.db.all(""" data = [self.deck.db.all("""
select ivl / :chunk as grp, count() from cards select ivl / :chunk as grp, count() from cards
where queue = 2 %s %s where gid in %s and queue = 2 %s
group by grp group by grp
order by grp""" % (self._limit(), lim), chunk=chunk)] order by grp""" % (self._limit(), lim), chunk=chunk)]
return data + list(self.deck.db.first(""" return data + list(self.deck.db.first("""
select count(), avg(ivl), max(ivl) from cards where queue = 2 %s""" % select count(), avg(ivl), max(ivl) from cards where gid in %s and queue = 2""" %
self._limit())) self._limit()))
# Eases # Eases
@ -540,7 +539,7 @@ group by hour having count() > 30 order by hour""" % lim,
i = [] i = []
(c, f) = self.deck.db.first(""" (c, f) = self.deck.db.first("""
select count(id), count(distinct fid) from cards select count(id), count(distinct fid) from cards
where 1 """ + self._limit()) where gid in %s """ % self._limit())
self._line(i, _("Total cards"), c) self._line(i, _("Total cards"), c)
self._line(i, _("Total facts"), f) self._line(i, _("Total facts"), f)
(low, avg, high) = self._factors() (low, avg, high) = self._factors()
@ -549,7 +548,7 @@ where 1 """ + self._limit())
self._line(i, _("Average ease factor"), "%d%%" % avg) self._line(i, _("Average ease factor"), "%d%%" % avg)
self._line(i, _("Highest ease factor"), "%d%%" % high) self._line(i, _("Highest ease factor"), "%d%%" % high)
min = self.deck.db.scalar( min = self.deck.db.scalar(
"select min(id) from cards where 1 " + self._limit()) "select min(id) from cards where gid in %s " % self._limit())
if min: if min:
self._line(i, _("First card created"), _("%s ago") % fmtTimeSpan( self._line(i, _("First card created"), _("%s ago") % fmtTimeSpan(
time.time() - (min/1000))) time.time() - (min/1000)))
@ -580,7 +579,7 @@ select
min(factor) / 10.0, min(factor) / 10.0,
avg(factor) / 10.0, avg(factor) / 10.0,
max(factor) / 10.0 max(factor) / 10.0
from cards where queue = 2 %s""" % self._limit()) from cards where gid in %s and queue = 2""" % self._limit())
def _cards(self): def _cards(self):
return self.deck.db.first(""" return self.deck.db.first("""
@ -589,7 +588,7 @@ sum(case when queue=2 and ivl >= 21 then 1 else 0 end), -- mtr
sum(case when queue=1 or (queue=2 and ivl < 21) then 1 else 0 end), -- yng/lrn sum(case when queue=1 or (queue=2 and ivl < 21) then 1 else 0 end), -- yng/lrn
sum(case when queue=0 then 1 else 0 end), -- new sum(case when queue=0 then 1 else 0 end), -- new
sum(case when queue=-1 then 1 else 0 end) -- susp sum(case when queue=-1 then 1 else 0 end) -- susp
from cards where 1 %s""" % self._limit()) from cards where gid in %s""" % self._limit())
# Tools # Tools
###################################################################### ######################################################################
@ -669,18 +668,11 @@ $(function () {
data=simplejson.dumps(data), conf=simplejson.dumps(conf))) data=simplejson.dumps(data), conf=simplejson.dumps(conf)))
def _limit(self): def _limit(self):
if self.selective: return self.deck.sched._groupLimit()
return self.deck.sched._groupLimit()
else:
return ""
def _revlogLimit(self): def _revlogLimit(self):
lim = self.deck.groups.active() return ("cid in (select id from cards where gid in %s)" %
if self.selective and lim: ids2str(self.deck.groups.active()))
return ("cid in (select id from cards where gid in %s)" %
ids2str(lim))
else:
return ""
def _title(self, title, subtitle=""): def _title(self, title, subtitle=""):
return '<h1>%s</h1>%s' % (title, subtitle) return '<h1>%s</h1>%s' % (title, subtitle)

View file

@ -27,6 +27,7 @@ def Deck(path, queue=True, lock=True):
ver = _createDB(db) ver = _createDB(db)
else: else:
ver = _upgradeSchema(db) ver = _upgradeSchema(db)
db.execute("pragma temp_store = memory")
db.execute("pragma cache_size = 10000") db.execute("pragma cache_size = 10000")
# add db to deck and do any remaining upgrades # add db to deck and do any remaining upgrades
deck = _Deck(db) deck = _Deck(db)
@ -156,6 +157,8 @@ def _updateIndices(db):
create index if not exists ix_facts_usn on facts (usn); create index if not exists ix_facts_usn on facts (usn);
-- card spacing, etc -- card spacing, etc
create index if not exists ix_cards_fid on cards (fid); create index if not exists ix_cards_fid on cards (fid);
-- scheduling and group limiting
create index if not exists ix_cards_sched on cards (gid, queue, due);
-- revlog by card -- revlog by card
create index if not exists ix_revlog_cid on revlog (cid); create index if not exists ix_revlog_cid on revlog (cid);
-- revlog syncing -- revlog syncing
@ -543,7 +546,6 @@ update cards set due = cast(
deck.save() deck.save()
# optimize and finish # optimize and finish
deck.sched.updateDynamicIndices()
deck.db.commit() deck.db.commit()
deck.db.execute("vacuum") deck.db.execute("vacuum")
deck.db.execute("analyze") deck.db.execute("analyze")

View file

@ -14,25 +14,26 @@ def test_basic():
assert len(deck.groups.groups) == 2 assert len(deck.groups.groups) == 2
# should get the same id # should get the same id
assert deck.groups.id("new group") == parentId assert deck.groups.id("new group") == parentId
# by default, everything should be shown # we start with the default group selected
assert not deck.groups.selected()
assert not deck.groups.active()
# and the default group is used
assert deck.groups.top()['id'] == 1
# we can select the default explicitly
deck.groups.select(1)
assert deck.groups.selected() == 1 assert deck.groups.selected() == 1
assert deck.groups.active() == [1] assert deck.groups.active() == [1]
assert deck.groups.top()['id'] == 1 assert deck.groups.top()['id'] == 1
# let's create a child and select that # we can select a different group
deck.groups.select(parentId)
assert deck.groups.selected() == parentId
assert deck.groups.active() == [parentId]
assert deck.groups.top()['id'] == parentId
# let's create a child
childId = deck.groups.id("new group::child") childId = deck.groups.id("new group::child")
# it should have been added to the active list
assert deck.groups.selected() == parentId
assert deck.groups.active() == [parentId, childId]
assert deck.groups.top()['id'] == parentId
# we can select the child individually too
deck.groups.select(childId) deck.groups.select(childId)
assert deck.groups.selected() == childId assert deck.groups.selected() == childId
assert deck.groups.active() == [childId] assert deck.groups.active() == [childId]
assert deck.groups.top()['id'] == parentId assert deck.groups.top()['id'] == parentId
# if we select the parent, the child gets included
deck.groups.select(parentId)
assert sorted(deck.groups.active()) == [parentId, childId]
def test_remove(): def test_remove():
deck = getEmptyDeck() deck = getEmptyDeck()

View file

@ -589,7 +589,7 @@ def test_ordcycle():
def test_counts_down(): def test_counts_down():
d = getEmptyDeck() d = getEmptyDeck()
# add a second group # add a second group
grp = d.groups.id("new group") grp = d.groups.id("Default::new group")
# for each card type # for each card type
for type in range(3): for type in range(3):
# and each of the groups # and each of the groups
@ -609,7 +609,7 @@ def test_counts_down():
# with the default settings, there's no count limit # with the default settings, there's no count limit
assert d.sched.counts() == (2,2,2) assert d.sched.counts() == (2,2,2)
# check limit to one group # check limit to one group
d.groups.select(1) d.groups.select(grp)
d.reset() d.reset()
assert d.sched.counts() == (1,1,1) assert d.sched.counts() == (1,1,1)
@ -702,7 +702,6 @@ def test_groupCounts():
foobaz = f.gid = d.groups.id("foo::baz") foobaz = f.gid = d.groups.id("foo::baz")
d.addFact(f) d.addFact(f)
d.reset() d.reset()
assert d.sched.counts() == (3, 0, 1)
assert len(d.groups.groups) == 5 assert len(d.groups.groups) == 5
cnts = d.sched.groupCounts() cnts = d.sched.groupCounts()
assert cnts[0] == ["Default", 1, 0, 1] assert cnts[0] == ["Default", 1, 0, 1]

View file

@ -10,7 +10,7 @@ def test_op():
assert not d.undoName() assert not d.undoName()
# let's adjust a study option # let's adjust a study option
d.save("studyopts") d.save("studyopts")
d.conf['revOrder'] = 5 d.conf['abc'] = 5
# it should be listed as undoable # it should be listed as undoable
assert d.undoName() == "studyopts" assert d.undoName() == "studyopts"
# with about 5 minutes until it's clobbered # with about 5 minutes until it's clobbered
@ -18,7 +18,7 @@ def test_op():
# undoing should restore the old value # undoing should restore the old value
d.undo() d.undo()
assert not d.undoName() assert not d.undoName()
assert d.conf['revOrder'] != 5 assert 'abc' not in d.conf
# an (auto)save will clear the undo # an (auto)save will clear the undo
d.save("foo") d.save("foo")
assert d.undoName() == "foo" assert d.undoName() == "foo"