Anki/anki/stats.py
Damien Elmes 024c42fef8 group scheduling refactor
see the following for background discussion:
http://groups.google.com/group/ankisrs-users/browse_thread/thread/4db5e82f7dff74fb

- change sched index to the more efficient gid, queue, due
- drop the dynamic index support. as there's no no q/a cache anymore, it's
  cheap enough to hit the cards table directly, and we can't use the index in
  its new form.
- drop order by clauses (see todo)
- ensure there's always an active group. if users want to study all groups at
  once, they need to create a top level group. we do this because otherwise
  the 'top level group' that's active when everything is selected is not
  clear.

to do:

- new cards will appear in gid order, but the gid numbers don't reflect
  alphabetical sorting. we need to change the scheduling code so that it steps
  through each group in turn
- likewise for the learn queue
2011-09-22 11:54:01 +09:00

694 lines
24 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import time, sys, os, datetime, simplejson
import anki.js
from anki.utils import fmtTimeSpan, fmtFloat, ids2str
from anki.consts import *
from anki.lang import _, ngettext
from anki.hooks import runFilter
# Card stats
##########################################################################
class CardStats(object):
def __init__(self, deck, card):
self.deck = deck
self.card = card
def report(self):
c = self.card
fmt = lambda x, **kwargs: fmtTimeSpan(x, short=True, **kwargs)
self.txt = "<table width=100%%>"
self.addLine(_("Added"), self.date(c.id/1000))
first = self.deck.db.scalar(
"select min(id) from revlog where cid = ?", c.id)
last = self.deck.db.scalar(
"select max(id) from revlog where cid = ?", c.id)
if first:
self.addLine(_("First Review"), self.date(first/1000))
self.addLine(_("Latest Review"), self.date(last/1000))
if c.queue in (1,2):
if c.queue == 2:
next = time.time()+((self.deck.sched.today - c.due)*86400)
else:
next = c.due
next = self.date(next)
self.addLine(_("Due"), next)
self.addLine(_("Interval"), fmt(c.ivl * 86400))
self.addLine(_("Ease"), "%d%%" % (c.factor/10.0))
(cnt, total) = self.deck.db.first(
"select count(), sum(time)/1000 from revlog where cid = :id",
id=c.id)
if cnt:
self.addLine(_("Average Time"), self.time(total / float(cnt)))
self.addLine(_("Total Time"), self.time(total))
elif c.queue == 0:
self.addLine(_("Position"), c.due)
self.addLine(_("Model"), c.model()['name'])
self.addLine(_("Template"), c.template()['name'])
self.addLine(_("Current Group"), self.deck.groups.name(c.gid))
self.addLine(_("Home Group"), self.deck.groups.name(c.fact().gid))
self.txt += "</table>"
return self.txt
def addLine(self, k, v):
self.txt += self.makeLine(k, v)
def makeLine(self, k, v):
txt = "<tr><td align=right style='padding-right: 3px;'>"
txt += "<b>%s</b></td><td>%s</td></tr>" % (k, v)
return txt
def date(self, tm):
return time.strftime("%Y-%m-%d", time.localtime(tm))
def time(self, tm):
str = ""
if tm >= 60:
str = fmtTimeSpan((tm/60)*60, short=True, point=-1, unit=1)
if tm%60 != 0 or not str:
str += fmtTimeSpan(tm%60, point=2 if not str else -1, short=True)
return str
# Deck stats
##########################################################################
colYoung = "#7c7"
colMature = "#070"
colCum = "rgba(0,0,0,0.9)"
colLearn = "#00F"
colRelearn = "#c00"
colCram = "#ff0"
colIvl = "#077"
colHour = "#777"
colTime = "#770"
colUnseen = "#000"
colSusp = "#ff0"
class DeckStats(object):
def __init__(self, deck):
self.deck = deck
self._stats = None
self.type = 0
self.width = 600
self.height = 200
def report(self, type=0):
# 0=days, 1=weeks, 2=months
# period-dependent graphs
self.type = type
txt = self.css
txt += self.dueGraph()
txt += self.repsGraph()
txt += self.ivlGraph()
# other graphs
txt += self.hourGraph()
txt += self.easeGraph()
txt += self.cardGraph()
return "<script>%s\n</script><center>%s</center>" % (anki.js.all, txt)
css = """
<style>
h1 { margin-bottom: 0; margin-top: 1em; }
body { font-size: 14px; }
table * { font-size: 14px; }
.pielabel { text-align:center; padding:0px; color:white; }
</style>
"""
# Due and cumulative due
######################################################################
def dueGraph(self):
if self.type == 0:
start = 0; end = 31; chunk = 1;
elif self.type == 1:
start = 0; end = 52; chunk = 7
elif self.type == 2:
start = 0; end = None; chunk = 30
d = self._due(start, end, chunk)
yng = []
mtr = []
tot = 0
totd = []
for day in d:
yng.append((day[0], day[1]))
mtr.append((day[0], day[2]))
tot += day[1]+day[2]
totd.append((day[0], tot))
data = [
dict(data=mtr, color=colMature, label=_("Mature")),
dict(data=yng, color=colYoung, label=_("Young")),
]
if len(totd) > 1:
data.append(
dict(data=totd, color=colCum, label=_("Cumulative"), yaxis=2,
bars={'show': False}, lines=dict(show=True), stack=False))
txt = self._title(
_("Forecast"),
_("The number of reviews due in the future."))
xaxis = dict(tickDecimals=0, min=-0.5)
if end is not None:
xaxis['max'] = end-0.5
txt += self._graph(id="due", data=data, conf=dict(
xaxis=xaxis,
yaxes=[dict(), dict(tickDecimals=0, position="right")]))
txt += self._dueInfo(tot, len(totd)*chunk)
return txt
def _dueInfo(self, tot, num):
i = []
self._line(i, _("Total"), _("%d reviews") % tot)
self._line(i, _("Average"), self._avgDay(
tot, num, _("reviews")))
return self._lineTbl(i)
def _due(self, start=None, end=None, chunk=1):
lim = ""
if start is not None:
lim += " and due-:today >= %d" % start
if end is not None:
lim += " and day < %d" % end
return self.deck.db.all("""
select (due-:today)/:chunk as day,
sum(case when ivl < 21 then 1 else 0 end), -- yng
sum(case when ivl >= 21 then 1 else 0 end) -- mtr
from cards
where gid in %s and queue = 2
%s
group by day order by day""" % (self._limit(), lim),
today=self.deck.sched.today,
chunk=chunk)
# Reps and time spent
######################################################################
def repsGraph(self):
if self.type == 0:
days = 30; chunk = 1
elif self.type == 1:
days = 52; chunk = 7
else:
days = None; chunk = 30
return self._repsGraph(self._done(days, chunk),
days,
_("Review Count"),
_("Review Time"))
def _repsGraph(self, data, days, reptitle, timetitle):
if not data:
return ""
d = data
conf = dict(
xaxis=dict(tickDecimals=0, max=0.5),
yaxes=[dict(), dict(position="right")])
if days is not None:
conf['xaxis']['min'] = -days-0.5
def plot(id, data, ylabel):
return self._graph(
id, data=data, conf=conf, ylabel=ylabel)
# reps
(repdata, repsum) = self._splitRepData(d, (
(3, colMature, _("Mature")),
(2, colYoung, _("Young")),
(4, colRelearn, _("Relearn")),
(1, colLearn, _("Learn")),
(5, colCram, _("Cram"))))
txt = self._title(
reptitle, _("The number of questions you have answered."))
txt += plot("reps", repdata, ylabel=_("Answers"))
(daysStud, fstDay) = self._daysStudied()
txt += self._ansInfo(repsum, daysStud, fstDay, _("reviews"))
# time
(timdata, timsum) = self._splitRepData(d, (
(8, colMature, _("Mature")),
(7, colYoung, _("Young")),
(9, colRelearn, _("Relearn")),
(6, colLearn, _("Learn")),
(10, colCram, _("Cram"))))
if self.type == 0:
t = _("Minutes")
convHours = False
else:
t = _("Hours")
convHours = True
txt += self._title(timetitle, _("The time taken to answer the questions."))
txt += plot("time", timdata, ylabel=t)
txt += self._ansInfo(timsum, daysStud, fstDay, _("minutes"), convHours)
return txt
def _ansInfo(self, totd, studied, first, unit, convHours=False):
if not totd:
return
tot = totd[-1][1]
period = self._periodDays()
if not period:
period = self.deck.sched.today - first + 1
i = []
self._line(i, _("Days studied"),
_("<b>%(pct)d%%</b> (%(x)s of %(y)s)") % dict(
x=studied, y=period, pct=studied/float(period)*100),
bold=False)
if convHours:
tunit = _("hours")
else:
tunit = unit
self._line(i, _("Total"), _("%(tot)s %(unit)s") % dict(
unit=tunit, tot=int(tot)))
if convHours:
# convert to minutes
tot *= 60
self._line(i, _("Average over studied"), self._avgDay(
tot, studied, unit))
self._line(i, _("If you studied every day"), self._avgDay(
tot, period, unit))
return self._lineTbl(i)
def _splitRepData(self, data, spec):
sep = {}
totcnt = {}
totd = {}
alltot = []
allcnt = 0
for (n, col, lab) in spec:
totcnt[n] = 0
totd[n] = []
sum = []
for row in data:
for (n, col, lab) in spec:
if n not in sep:
sep[n] = []
sep[n].append((row[0], row[n]))
totcnt[n] += row[n]
allcnt += row[n]
totd[n].append((row[0], totcnt[n]))
alltot.append((row[0], allcnt))
ret = []
for (n, col, lab) in spec:
if len(totd[n]) > 1 and totcnt[n]:
# bars
ret.append(dict(data=sep[n], color=col, label=lab))
# lines
ret.append(dict(
data=totd[n], color=col, label=None, yaxis=2,
bars={'show': False}, lines=dict(show=True), stack=-n))
return (ret, alltot)
def _done(self, num=7, chunk=1):
lims = []
if num is not None:
lims.append("id > %d" % (
(self.deck.sched.dayCutoff-(num*chunk*86400))*1000))
lim = self._revlogLimit()
if lim:
lims.append(lim)
if lims:
lim = "where " + " and ".join(lims)
else:
lim = ""
if self.type == 0:
tf = 60.0 # minutes
else:
tf = 3600.0 # hours
return self.deck.db.all("""
select
(cast((id/1000 - :cut) / 86400.0 as int))/:chunk as day,
sum(case when type = 0 then 1 else 0 end), -- lrn count
sum(case when type = 1 and lastIvl < 21 then 1 else 0 end), -- yng count
sum(case when type = 1 and lastIvl >= 21 then 1 else 0 end), -- mtr count
sum(case when type = 2 then 1 else 0 end), -- lapse count
sum(case when type = 3 then 1 else 0 end), -- cram count
sum(case when type = 0 then time/1000 else 0 end)/:tf, -- lrn time
-- yng + mtr time
sum(case when type = 1 and lastIvl < 21 then time/1000 else 0 end)/:tf,
sum(case when type = 1 and lastIvl >= 21 then time/1000 else 0 end)/:tf,
sum(case when type = 2 then time/1000 else 0 end)/:tf, -- lapse time
sum(case when type = 3 then time/1000 else 0 end)/:tf -- cram time
from revlog %s
group by day order by day""" % lim,
cut=self.deck.sched.dayCutoff,
tf=tf,
chunk=chunk)
def _daysStudied(self):
lims = []
num = self._periodDays()
if num:
lims.append(
"id > %d" %
((self.deck.sched.dayCutoff-(num*86400))*1000))
rlim = self._revlogLimit()
if rlim:
lims.append(rlim)
if lims:
lim = "where " + " and ".join(lims)
else:
lim = ""
return self.deck.db.first("""
select count(), abs(min(day)) from (select
(cast((id/1000 - :cut) / 86400.0 as int)+1) as day
from revlog %s
group by day order by day)""" % lim,
cut=self.deck.sched.dayCutoff)
# Intervals
######################################################################
def ivlGraph(self):
(ivls, all, avg, max) = self._ivls()
tot = 0
totd = []
if not ivls or not all:
return ""
for (grp, cnt) in ivls:
tot += cnt
totd.append((grp, tot/float(all)*100))
txt = self._title(_("Intervals"),
_("Delays until reviews are shown again."))
txt += self._graph(id="ivl", data=[
dict(data=ivls, color=colIvl, label=_("All Types")),
dict(data=totd, color=colCum, label=_("% Total"), yaxis=2,
bars={'show': False}, lines=dict(show=True), stack=False)
], conf=dict(
xaxis=dict(min=-0.5, max=ivls[-1][0]+0.5),
yaxes=[dict(), dict(position="right", max=105)]))
i = []
self._line(i, _("Average interval"), fmtTimeSpan(avg*86400))
self._line(i, _("Longest interval"), fmtTimeSpan(max*86400))
return txt + self._lineTbl(i)
def _ivls(self):
if self.type == 0:
chunk = 1; lim = " and grp <= 30"
elif self.type == 1:
chunk = 7; lim = " and grp <= 52"
else:
chunk = 30; lim = ""
data = [self.deck.db.all("""
select ivl / :chunk as grp, count() from cards
where gid in %s and queue = 2 %s
group by grp
order by grp""" % (self._limit(), lim), chunk=chunk)]
return data + list(self.deck.db.first("""
select count(), avg(ivl), max(ivl) from cards where gid in %s and queue = 2""" %
self._limit()))
# Eases
######################################################################
def easeGraph(self):
# 3 + 4 + 4 + spaces on sides and middle = 15
# yng starts at 1+3+1 = 5
# mtr starts at 5+4+1 = 10
d = {'lrn':[], 'yng':[], 'mtr':[]}
types = ("lrn", "yng", "mtr")
eases = self._eases()
for (type, ease, cnt) in eases:
if type == 1:
ease += 5
elif type == 2:
ease += 10
n = types[type]
d[n].append((ease, cnt))
ticks = [[1,1],[2,2],[3,3],
[6,1],[7,2],[8,3],[9,4],
[11, 1],[12,2],[13,3],[14,4]]
txt = self._title(_("Answer Buttons"),
_("The number of times you have pressed each button."))
txt += self._graph(id="ease", data=[
dict(data=d['lrn'], color=colLearn, label=_("Learning")),
dict(data=d['yng'], color=colYoung, label=_("Young")),
dict(data=d['mtr'], color=colMature, label=_("Mature")),
], type="barsLine", conf=dict(
xaxis=dict(ticks=ticks, min=0, max=15)),
ylabel=_("Answers"))
txt += self._easeInfo(eases)
return txt
def _easeInfo(self, eases):
types = {0: [0, 0], 1: [0, 0], 2: [0,0]}
for (type, ease, cnt) in eases:
if ease == 1:
types[type][0] += cnt
else:
types[type][1] += cnt
i = []
for type in range(3):
(bad, good) = types[type]
tot = bad + good
try:
pct = good / float(tot) * 100
except:
pct = 0
i.append(_(
"Correct: <b>%(pct)0.2f%%</b><br>(%(good)d of %(tot)d)") % dict(
pct=pct, good=good, tot=tot))
return ("""
<center><table width=%dpx><tr><td width=50></td><td align=center>""" % self.width +
"</td><td align=center>".join(i) +
"</td></tr></table></center>")
def _eases(self):
lim = self._revlogLimit()
if lim:
lim = "where " + lim
return self.deck.db.all("""
select (case
when type in (0,2) then 0
when lastIvl < 21 then 1
else 2 end) as thetype,
ease, count() from revlog %s
group by thetype, ease
order by thetype, ease""" % lim)
# Hourly retention
######################################################################
def hourGraph(self):
data = self._hourRet()
if not data:
return ""
shifted = []
trend = []
peak = 0
for d in data:
hour = (d[0] - 4) % 24
pct = d[1]
if pct > peak:
peak = pct
shifted.append((hour, pct))
shifted.sort()
for d in shifted:
hour = d[0]
pct = d[1]
if not trend:
trend.append((hour, pct))
else:
prev = trend[-1][1]
diff = pct-prev
diff /= 3.0
diff = round(diff, 1)
trend.append((hour, prev+diff))
txt = self._title(_("Hourly Retention"),
_("Review success rate for each hour of the day."))
txt += self._graph(id="hour", data=[
dict(data=shifted, color=colHour, label=_("% Failed")),
dict(data=trend, color=colCum, label=_("Trend"),
bars={'show': False}, lines=dict(show=True), stack=False)
], conf=dict(
xaxis=dict(ticks=[[0, _("4AM")], [6, _("10AM")],
[12, _("4PM")], [18, _("10PM")], [23, _("3AM")]]),
yaxis=dict(max=peak)),
ylabel=_("%Correct"))
return txt
def _hourRet(self):
lim = self._revlogLimit()
if lim:
lim = " and " + lim
sd = datetime.datetime.fromtimestamp(self.deck.crt)
return self.deck.db.all("""
select
23 - ((cast((:cut - id/1000) / 3600.0 as int)) %% 24) as hour,
sum(case when ease = 1 then 0 else 1 end) /
cast(count() as float) * 100,
count()
from revlog where type = 1 %s
group by hour having count() > 30 order by hour""" % lim,
cut=self.deck.sched.dayCutoff-(sd.hour*3600))
# Cards
######################################################################
def cardGraph(self):
# graph data
div = self._cards()
d = []
for c, (t, col) in enumerate((
(_("Mature"), colMature),
(_("Young+Learn"), colYoung),
(_("Unseen"), colUnseen),
(_("Suspended"), colSusp))):
d.append(dict(data=div[c], label=t, color=col))
# text data
i = []
(c, f) = self.deck.db.first("""
select count(id), count(distinct fid) from cards
where gid in %s """ % self._limit())
self._line(i, _("Total cards"), c)
self._line(i, _("Total facts"), f)
(low, avg, high) = self._factors()
if low:
self._line(i, _("Lowest ease factor"), "%d%%" % low)
self._line(i, _("Average ease factor"), "%d%%" % avg)
self._line(i, _("Highest ease factor"), "%d%%" % high)
min = self.deck.db.scalar(
"select min(id) from cards where gid in %s " % self._limit())
if min:
self._line(i, _("First card created"), _("%s ago") % fmtTimeSpan(
time.time() - (min/1000)))
info = "<table width=100%>" + "".join(i) + "</table><p>"
info += _('''\
A card's <i>ease factor</i> is the size of the next interval \
when you answer "good" on a review.''')
txt = self._title(_("Cards Types"),
_("The division of cards in your deck."))
txt += "<table width=%d><tr><td>%s</td><td>%s</td></table>" % (
self.width,
self._graph(id="cards", data=d, type="pie"),
info)
return txt
def _line(self, i, a, b, bold=True):
if bold:
i.append(("<tr><td width=200 align=right>%s:</td><td><b>%s</b></td></tr>") % (a,b))
else:
i.append(("<tr><td width=200 align=right>%s:</td><td>%s</td></tr>") % (a,b))
def _lineTbl(self, i):
return "<table width=400>" + "".join(i) + "</table>"
def _factors(self):
return self.deck.db.first("""
select
min(factor) / 10.0,
avg(factor) / 10.0,
max(factor) / 10.0
from cards where gid in %s and queue = 2""" % self._limit())
def _cards(self):
return self.deck.db.first("""
select
sum(case when queue=2 and ivl >= 21 then 1 else 0 end), -- mtr
sum(case when queue=1 or (queue=2 and ivl < 21) then 1 else 0 end), -- yng/lrn
sum(case when queue=0 then 1 else 0 end), -- new
sum(case when queue=-1 then 1 else 0 end) -- susp
from cards where gid in %s""" % self._limit())
# Tools
######################################################################
def _graph(self, id, data, conf={},
type="bars", ylabel=_("Cards"), timeTicks=True):
# display settings
if type == "pie":
conf['legend'] = {'container': "#%sLegend" % id, 'noColumns':2}
else:
conf['legend'] = {'container': "#%sLegend" % id, 'noColumns':10}
conf['series'] = dict(stack=True)
if not 'yaxis' in conf:
conf['yaxis'] = {}
conf['yaxis']['labelWidth'] = 40
if 'xaxis' not in conf:
conf['xaxis'] = {}
if timeTicks:
conf['timeTicks'] = (_("d"), _("w"), _("m"))[self.type]
# types
width = self.width
height = self.height
if type == "bars":
conf['series']['bars'] = dict(
show=True, barWidth=0.8, align="center", fill=0.7, lineWidth=0)
elif type == "barsLine":
conf['series']['bars'] = dict(
show=True, barWidth=0.8, align="center", fill=0.7, lineWidth=3)
elif type == "fill":
conf['series']['lines'] = dict(show=True, fill=True)
elif type == "pie":
width /= 2.3
height *= 1.5
ylabel = ""
conf['series']['pie'] = dict(
show=True,
radius=1,
stroke=dict(color="#fff", width=5),
label=dict(
show=True,
radius=0.8,
threshold=0.01,
background=dict(
opacity=0.5,
color="#000"
)))
#conf['legend'] = dict(show=False)
return (
"""
<table cellpadding=0 cellspacing=10>
<tr>
<td><div style="width: 10px; -webkit-transform: rotate(-90deg);
-moz-transform: rotate(-90deg);">%(ylab)s</div></td>
<td>
<center><div id=%(id)sLegend></div></center>
<div id="%(id)s" style="width:%(w)s; height:%(h)s;"></div>
</td></tr></table>
<script>
$(function () {
var conf = %(conf)s;
if (conf.timeTicks) {
conf.xaxis.tickFormatter = function (val, axis) {
return val.toFixed(0)+conf.timeTicks;
}
}
if (conf.series.pie) {
conf.series.pie.label.formatter = function(label, series){
return '<div class=pielabel>'+Math.round(series.percent)+'%%</div>';
};
}
$.plot($("#%(id)s"), %(data)s, conf);
});
</script>""" % dict(
id=id, w=width, h=height,
ylab=ylabel,
data=simplejson.dumps(data), conf=simplejson.dumps(conf)))
def _limit(self):
return self.deck.sched._groupLimit()
def _revlogLimit(self):
return ("cid in (select id from cards where gid in %s)" %
ids2str(self.deck.groups.active()))
def _title(self, title, subtitle=""):
return '<h1>%s</h1>%s' % (title, subtitle)
def _periodDays(self):
if self.type == 0:
return 30
elif self.type == 1:
return 365
else:
return None
def _avgDay(self, tot, num, unit):
vals = []
try:
vals.append(_("%d %s/day") % (tot/float(num), unit))
return ", ".join(vals)
except ZeroDivisionError:
return ""