group scheduling refactor

see the following for background discussion:
http://groups.google.com/group/ankisrs-users/browse_thread/thread/4db5e82f7dff74fb

- change sched index to the more efficient gid, queue, due
- drop the dynamic index support. as there's no no q/a cache anymore, it's
  cheap enough to hit the cards table directly, and we can't use the index in
  its new form.
- drop order by clauses (see todo)
- ensure there's always an active group. if users want to study all groups at
  once, they need to create a top level group. we do this because otherwise
  the 'top level group' that's active when everything is selected is not
  clear.

to do:

- new cards will appear in gid order, but the gid numbers don't reflect
  alphabetical sorting. we need to change the scheduling code so that it steps
  through each group in turn
- likewise for the learn queue
This commit is contained in:
Damien Elmes 2011-09-22 11:54:01 +09:00
parent dac46752ed
commit 024c42fef8
10 changed files with 76 additions and 109 deletions

View file

@ -16,9 +16,9 @@ NEW_TODAY_ORD = 0
NEW_TODAY_DUE = 1
# review card sort order
REV_CARDS_OLD_FIRST = 0
REV_CARDS_NEW_FIRST = 1
REV_CARDS_RANDOM = 2
REV_CARDS_RANDOM = 0
REV_CARDS_OLD_FIRST = 1
REV_CARDS_NEW_FIRST = 2
# removal types
REM_CARD = 0

View file

@ -66,11 +66,10 @@ class CramScheduler(Scheduler):
else:
maxlim = ""
self.newQueue = self.deck.db.list("""
select id from cards where queue = 2 and due >= %d
%s
%s order by %s limit %d""" % (self.today+1+self.min,
select id from cards where gid in %s and queue = 2 and due >= %d
%s order by %s limit %d""" % (self._groupLimit(),
self.today+1+self.min,
maxlim,
self._groupLimit(),
self.order,
self.reportLimit))
self.newCount = len(self.newQueue)

View file

@ -20,9 +20,9 @@ import anki.cards, anki.facts, anki.template, anki.cram, anki.find
defaultConf = {
# scheduling options
'activeGroups': [],
'activeGroups': [1],
'topGroup': 1,
'curGroup': None,
'curGroup': 1,
'revOrder': REV_CARDS_RANDOM,
# other config
'nextPos': 1,

View file

@ -114,11 +114,12 @@ class GroupManager(object):
g['conf'] = 1
while 1:
id = intTime(1000)
if str(id) in self.groups:
continue
if str(id) not in self.groups:
break
g['id'] = id
self.groups[str(id)] = g
self.save(g)
self.maybeAddToActive(g)
return int(id)
def rem(self, gid, cardsToo=False):
@ -179,9 +180,14 @@ class GroupManager(object):
def update(self, g):
"Add or update an existing group. Used for syncing and merging."
self.groups[str(g['id'])] = g
self.maybeAddToActive(g)
# mark registry changed, but don't bump mod time
self.save()
def maybeAddToActive(self, g):
# since order is important, we can't just append to the end
self.select(self.selected())
def updateConf(self, g):
self.gconf[str(g['id'])] = g
self.save()
@ -209,16 +215,11 @@ usn=?,mod=? where id in %s""" % ids2str(cids),
return self.deck.conf['activeGroups']
def selected(self):
"The currently selected gid, or None if whole collection."
"The currently selected gid."
return self.deck.conf['curGroup']
def select(self, gid):
"Select a new group. If gid is None, select whole collection."
if not gid:
self.deck.conf['topGroup'] = 1
self.deck.conf['curGroup'] = None
self.deck.conf['activeGroups'] = []
return
"Select a new branch."
# save the top level group
name = self.groups[str(gid)]['name']
self.deck.conf['topGroup'] = self.topFor(name)
@ -235,3 +236,9 @@ usn=?,mod=? where id in %s""" % ids2str(cids),
"The top level gid for NAME."
path = name.split("::")
return self.id(path[0])
def underSelected(self, name):
"True if name is under the selected group."
# if nothing is selected, always true
s = self.selected()
return name.startswith(self.get(s)['name'])

View file

@ -73,7 +73,7 @@ class Scheduler(object):
"Return counts over next DAYS. Includes today."
daysd = dict(self.deck.db.all("""
select due, count() from cards
where queue = 2 %s
where gid in %s and queue = 2
and due between ? and ?
group by due
order by due""" % self._groupLimit(),
@ -230,13 +230,13 @@ select 1 from cards where gid = ? and
else:
self.newCount = self.deck.db.scalar("""
select count() from (select id from cards where
queue = 0 %s limit %d)""" % (self._groupLimit(), lim))
gid in %s and queue = 0 limit %d)""" % (self._groupLimit(), lim))
def _resetNew(self):
lim = min(self.queueLimit, self.newCount)
self.newQueue = self.deck.db.all("""
select id, due from cards where
queue = 0 %s order by due limit %d""" % (self._groupLimit(),
gid in %s and queue = 0 limit %d""" % (self._groupLimit(),
lim))
self.newQueue.reverse()
self._updateNewCardRatio()
@ -289,14 +289,14 @@ queue = 0 %s order by due limit %d""" % (self._groupLimit(),
self._updateStatsDay("lrn")
self.lrnCount = self.deck.db.scalar("""
select count() from (select id from cards where
queue = 1 %s and due < ? limit %d)""" % (
gid in %s and queue = 1 and due < ? limit %d)""" % (
self._groupLimit(), self.reportLimit),
intTime() + self.deck.groups.top()['collapseTime'])
def _resetLrn(self):
self.lrnQueue = self.deck.db.all("""
select due, id from cards where
queue = 1 %s and due < :lim order by due
gid in %s and queue = 1 and due < :lim
limit %d""" % (self._groupLimit(), self.reportLimit), lim=self.dayCutoff)
def _getLrnCard(self, collapse=False):
@ -417,22 +417,19 @@ where queue = 1 and type = 2
self._updateStatsDay("rev")
self.revCount = self.deck.db.scalar("""
select count() from (select id from cards where
queue = 2 %s and due <= :lim limit %d)""" % (
gid in %s and queue = 2 and due <= :lim limit %d)""" % (
self._groupLimit(), self.reportLimit),
lim=self.today)
def _resetRev(self):
self.revQueue = self.deck.db.list("""
select id from cards where
queue = 2 %s and due <= :lim order by %s limit %d""" % (
gid in %s and queue = 2 and due <= :lim %s limit %d""" % (
self._groupLimit(), self._revOrder(), self.queueLimit),
lim=self.today)
if self.deck.conf['revOrder'] == REV_CARDS_RANDOM:
r = random.Random()
r.seed(self.today)
r.shuffle(self.revQueue)
else:
self.revQueue.reverse()
def _getRevCard(self):
if self._haveRevCards():
@ -446,9 +443,9 @@ queue = 2 %s and due <= :lim order by %s limit %d""" % (
return self.revQueue
def _revOrder(self):
return ("ivl desc",
"ivl",
"due")[self.deck.conf['revOrder']]
if self.deck.conf['revOrder']:
return "order by %s" % ("ivl desc", "ivl")[self.deck.conf['revOrder']-1]
return ""
# Answering a review card
##########################################################################
@ -585,11 +582,7 @@ queue = 2 %s and due <= :lim order by %s limit %d""" % (
return self.deck.groups.conf(card.gid)
def _groupLimit(self):
l = self.deck.groups.active()
if not l:
# everything
return ""
return " and gid in %s" % ids2str(l)
return ids2str(self.deck.groups.active())
# Daily cutoff
##########################################################################
@ -645,16 +638,15 @@ queue = 2 %s and due <= :lim order by %s limit %d""" % (
def revTomorrow(self):
"Number of reviews due tomorrow."
return self.deck.db.scalar(
"select count() from cards where queue = 2 and due = ?"+
self._groupLimit(),
self.today+1)
"select count() from cards where gid in %s and queue = 2 and due = ?"%
self._groupLimit(), self.today+1)
def newTomorrow(self):
"Number of new cards tomorrow."
lim = self.deck.groups.top()['newPerDay']
return self.deck.db.scalar(
"select count() from (select id from cards where "
"queue = 0 %s limit %d)" % (self._groupLimit(), lim))
"gid in %s and queue = 0 limit %d)" % (self._groupLimit(), lim))
# Next time reports
##########################################################################
@ -732,31 +724,6 @@ queue = 2 %s and due <= :lim order by %s limit %d""" % (
"Number of cards answered today."
return sum(self.counts())
# Dynamic indices
##########################################################################
# fixme: warn user that the default is faster
def updateDynamicIndices(self):
"Call this after revOrder is changed. Bumps schema."
# determine required columns
required = []
if self.deck.conf['revOrder'] in (
REV_CARDS_OLD_FIRST, REV_CARDS_NEW_FIRST):
required.append("interval")
cols = ["queue", "due", "gid"] + required
# update if changed
if self.deck.db.scalar(
"select 1 from sqlite_master where name = 'ix_cards_multi'"):
rows = self.deck.db.all("pragma index_info('ix_cards_multi')")
else:
rows = None
if not (rows and cols == [r[2] for r in rows]):
self.deck.db.execute("drop index if exists ix_cards_multi")
self.deck.db.execute("create index ix_cards_multi on cards (%s)" %
", ".join(cols))
self.deck.db.execute("analyze")
self.deck.modSchema()
# Resetting
##########################################################################

View file

@ -97,11 +97,10 @@ class DeckStats(object):
self.width = 600
self.height = 200
def report(self, type=0, selective=True):
def report(self, type=0):
# 0=days, 1=weeks, 2=months
# period-dependent graphs
self.type = type
self.selective = selective
txt = self.css
txt += self.dueGraph()
txt += self.repsGraph()
@ -179,7 +178,7 @@ select (due-:today)/:chunk as day,
sum(case when ivl < 21 then 1 else 0 end), -- yng
sum(case when ivl >= 21 then 1 else 0 end) -- mtr
from cards
where queue = 2 %s
where gid in %s and queue = 2
%s
group by day order by day""" % (self._limit(), lim),
today=self.deck.sched.today,
@ -392,11 +391,11 @@ group by day order by day)""" % lim,
chunk = 30; lim = ""
data = [self.deck.db.all("""
select ivl / :chunk as grp, count() from cards
where queue = 2 %s %s
where gid in %s and queue = 2 %s
group by grp
order by grp""" % (self._limit(), lim), chunk=chunk)]
return data + list(self.deck.db.first("""
select count(), avg(ivl), max(ivl) from cards where queue = 2 %s""" %
select count(), avg(ivl), max(ivl) from cards where gid in %s and queue = 2""" %
self._limit()))
# Eases
@ -540,7 +539,7 @@ group by hour having count() > 30 order by hour""" % lim,
i = []
(c, f) = self.deck.db.first("""
select count(id), count(distinct fid) from cards
where 1 """ + self._limit())
where gid in %s """ % self._limit())
self._line(i, _("Total cards"), c)
self._line(i, _("Total facts"), f)
(low, avg, high) = self._factors()
@ -549,7 +548,7 @@ where 1 """ + self._limit())
self._line(i, _("Average ease factor"), "%d%%" % avg)
self._line(i, _("Highest ease factor"), "%d%%" % high)
min = self.deck.db.scalar(
"select min(id) from cards where 1 " + self._limit())
"select min(id) from cards where gid in %s " % self._limit())
if min:
self._line(i, _("First card created"), _("%s ago") % fmtTimeSpan(
time.time() - (min/1000)))
@ -580,7 +579,7 @@ select
min(factor) / 10.0,
avg(factor) / 10.0,
max(factor) / 10.0
from cards where queue = 2 %s""" % self._limit())
from cards where gid in %s and queue = 2""" % self._limit())
def _cards(self):
return self.deck.db.first("""
@ -589,7 +588,7 @@ sum(case when queue=2 and ivl >= 21 then 1 else 0 end), -- mtr
sum(case when queue=1 or (queue=2 and ivl < 21) then 1 else 0 end), -- yng/lrn
sum(case when queue=0 then 1 else 0 end), -- new
sum(case when queue=-1 then 1 else 0 end) -- susp
from cards where 1 %s""" % self._limit())
from cards where gid in %s""" % self._limit())
# Tools
######################################################################
@ -669,18 +668,11 @@ $(function () {
data=simplejson.dumps(data), conf=simplejson.dumps(conf)))
def _limit(self):
if self.selective:
return self.deck.sched._groupLimit()
else:
return ""
def _revlogLimit(self):
lim = self.deck.groups.active()
if self.selective and lim:
return ("cid in (select id from cards where gid in %s)" %
ids2str(lim))
else:
return ""
ids2str(self.deck.groups.active()))
def _title(self, title, subtitle=""):
return '<h1>%s</h1>%s' % (title, subtitle)

View file

@ -27,6 +27,7 @@ def Deck(path, queue=True, lock=True):
ver = _createDB(db)
else:
ver = _upgradeSchema(db)
db.execute("pragma temp_store = memory")
db.execute("pragma cache_size = 10000")
# add db to deck and do any remaining upgrades
deck = _Deck(db)
@ -156,6 +157,8 @@ def _updateIndices(db):
create index if not exists ix_facts_usn on facts (usn);
-- card spacing, etc
create index if not exists ix_cards_fid on cards (fid);
-- scheduling and group limiting
create index if not exists ix_cards_sched on cards (gid, queue, due);
-- revlog by card
create index if not exists ix_revlog_cid on revlog (cid);
-- revlog syncing
@ -543,7 +546,6 @@ update cards set due = cast(
deck.save()
# optimize and finish
deck.sched.updateDynamicIndices()
deck.db.commit()
deck.db.execute("vacuum")
deck.db.execute("analyze")

View file

@ -14,25 +14,26 @@ def test_basic():
assert len(deck.groups.groups) == 2
# should get the same id
assert deck.groups.id("new group") == parentId
# by default, everything should be shown
assert not deck.groups.selected()
assert not deck.groups.active()
# and the default group is used
assert deck.groups.top()['id'] == 1
# we can select the default explicitly
deck.groups.select(1)
# we start with the default group selected
assert deck.groups.selected() == 1
assert deck.groups.active() == [1]
assert deck.groups.top()['id'] == 1
# let's create a child and select that
# we can select a different group
deck.groups.select(parentId)
assert deck.groups.selected() == parentId
assert deck.groups.active() == [parentId]
assert deck.groups.top()['id'] == parentId
# let's create a child
childId = deck.groups.id("new group::child")
# it should have been added to the active list
assert deck.groups.selected() == parentId
assert deck.groups.active() == [parentId, childId]
assert deck.groups.top()['id'] == parentId
# we can select the child individually too
deck.groups.select(childId)
assert deck.groups.selected() == childId
assert deck.groups.active() == [childId]
assert deck.groups.top()['id'] == parentId
# if we select the parent, the child gets included
deck.groups.select(parentId)
assert sorted(deck.groups.active()) == [parentId, childId]
def test_remove():
deck = getEmptyDeck()

View file

@ -589,7 +589,7 @@ def test_ordcycle():
def test_counts_down():
d = getEmptyDeck()
# add a second group
grp = d.groups.id("new group")
grp = d.groups.id("Default::new group")
# for each card type
for type in range(3):
# and each of the groups
@ -609,7 +609,7 @@ def test_counts_down():
# with the default settings, there's no count limit
assert d.sched.counts() == (2,2,2)
# check limit to one group
d.groups.select(1)
d.groups.select(grp)
d.reset()
assert d.sched.counts() == (1,1,1)
@ -702,7 +702,6 @@ def test_groupCounts():
foobaz = f.gid = d.groups.id("foo::baz")
d.addFact(f)
d.reset()
assert d.sched.counts() == (3, 0, 1)
assert len(d.groups.groups) == 5
cnts = d.sched.groupCounts()
assert cnts[0] == ["Default", 1, 0, 1]

View file

@ -10,7 +10,7 @@ def test_op():
assert not d.undoName()
# let's adjust a study option
d.save("studyopts")
d.conf['revOrder'] = 5
d.conf['abc'] = 5
# it should be listed as undoable
assert d.undoName() == "studyopts"
# with about 5 minutes until it's clobbered
@ -18,7 +18,7 @@ def test_op():
# undoing should restore the old value
d.undo()
assert not d.undoName()
assert d.conf['revOrder'] != 5
assert 'abc' not in d.conf
# an (auto)save will clear the undo
d.save("foo")
assert d.undoName() == "foo"