Anki/anki/utils.py
Damien Elmes 55f4b9b7d0 favour integers, change due representation, fact&card ordering, more
- removed 'created' column from various tables. We don't care when things like
  models are created, and card creation time didn't reflect the actual time a
  card was created
- facts were previously ordered by their creation date. The code would
  manually set the creation time for subsequent facts on import by 0.0001
  seconds, and then card due times were set by adding the fact time to the
  ordinal number*0.000001. This was prone to error, and the number of zeros used
  was actually different in different parts of the code. Instead of this, we
  replace it with a 'pos' column on facts, which increments for each new fact.
- importing should add new facts with a higher pos, but concurrent updates in
  a synced deck can have multiple facts with the same pos

- due times are completely different now, and depend on the card type
- new cards have due=fact.pos or random(0, 10000)
- reviews have due set to an integer representing days since deck
  creation/download
- cards in the learn queue use an integer timestamp in seconds

- many columns like modified, lastSync, factor, interval, etc have been converted to
  integer columns. They are cheaper to store (large decks can save 10s of
  megabytes) and faster to search for.

- cards have their group assigned on fact creation. In the future we'll add a
  per-template option for a default group.

- switch to due/random order for the review queue on upgrade. Users can still
  switch to the old behaviour if they want, but many people don't care what
  it's set to, and due is considerably faster, which may result in a better
  user experience
2011-04-28 09:23:28 +09:00

299 lines
9.1 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import re, os, random, time, types, math, htmlentitydefs, subprocess
try:
import hashlib
md5 = hashlib.md5
except ImportError:
import md5
md5 = md5.new
from anki.db import *
from anki.lang import _, ngettext
import locale, sys
if sys.version_info[1] < 5:
def format_string(a, b):
return a % b
locale.format_string = format_string
# Time handling
##############################################################################
def intTime():
return int(time.time())
timeTable = {
"years": lambda n: ngettext("%s year", "%s years", n),
"months": lambda n: ngettext("%s month", "%s months", n),
"days": lambda n: ngettext("%s day", "%s days", n),
"hours": lambda n: ngettext("%s hour", "%s hours", n),
"minutes": lambda n: ngettext("%s minute", "%s minutes", n),
"seconds": lambda n: ngettext("%s second", "%s seconds", n),
}
afterTimeTable = {
"years": lambda n: ngettext("%s year<!--after-->", "%s years<!--after-->", n),
"months": lambda n: ngettext("%s month<!--after-->", "%s months<!--after-->", n),
"days": lambda n: ngettext("%s day<!--after-->", "%s days<!--after-->", n),
"hours": lambda n: ngettext("%s hour<!--after-->", "%s hours<!--after-->", n),
"minutes": lambda n: ngettext("%s minute<!--after-->", "%s minutes<!--after-->", n),
"seconds": lambda n: ngettext("%s second<!--after-->", "%s seconds<!--after-->", n),
}
shortTimeTable = {
"years": _("%sy"),
"months": _("%sm"),
"days": _("%sd"),
"hours": _("%sh"),
"minutes": _("%sm"),
"seconds": _("%ss"),
}
def fmtTimeSpan(time, pad=0, point=0, short=False, after=False):
"Return a string representing a time span (eg '2 days')."
(type, point) = optimalPeriod(time, point)
time = convertSecondsTo(time, type)
if not point:
time = math.floor(time)
if short:
fmt = shortTimeTable[type]
else:
if after:
fmt = afterTimeTable[type](_pluralCount(time, point))
else:
fmt = timeTable[type](_pluralCount(time, point))
timestr = "%(a)d.%(b)df" % {'a': pad, 'b': point}
return locale.format_string("%" + (fmt % timestr), time)
def optimalPeriod(time, point):
if abs(time) < 60:
type = "seconds"
point -= 1
elif abs(time) < 3599:
type = "minutes"
elif abs(time) < 60 * 60 * 24:
type = "hours"
elif abs(time) < 60 * 60 * 24 * 30:
type = "days"
elif abs(time) < 60 * 60 * 24 * 365:
type = "months"
point += 1
else:
type = "years"
point += 1
return (type, max(point, 0))
def convertSecondsTo(seconds, type):
if type == "seconds":
return seconds
elif type == "minutes":
return seconds / 60.0
elif type == "hours":
return seconds / 3600.0
elif type == "days":
return seconds / 86400.0
elif type == "months":
return seconds / 2592000.0
elif type == "years":
return seconds / 31536000.0
assert False
def _pluralCount(time, point):
if point:
return 2
return math.floor(time)
# Locale
##############################################################################
def fmtPercentage(float_value, point=1):
"Return float with percentage sign"
fmt = '%' + "0.%(b)df" % {'b': point}
return locale.format_string(fmt, float_value) + "%"
def fmtFloat(float_value, point=1):
"Return a string with decimal separator according to current locale"
fmt = '%' + "0.%(b)df" % {'b': point}
return locale.format_string(fmt, float_value)
# HTML
##############################################################################
def stripHTML(s):
s = re.sub("(?s)<style.*?>.*?</style>", "", s)
s = re.sub("(?s)<script.*?>.*?</script>", "", s)
s = re.sub("<.*?>", "", s)
s = entsToTxt(s)
return s
def stripHTMLAlt(s):
"Strip HTML, preserving img alt text."
s = re.sub("<img [^>]*alt=[\"']?([^\"'>]+)[\"']?[^>]*>", "\\1", s)
return stripHTML(s)
def stripHTMLMedia(s):
"Strip HTML but keep media filenames"
s = re.sub("<img src=[\"']?([^\"'>]+)[\"']? ?/?>", " \\1 ", s)
return stripHTML(s)
def tidyHTML(html):
"Remove cruft like body tags and return just the important part."
# contents of body - no head or html tags
html = re.sub(u".*<body.*?>(.*)</body></html>",
"\\1", html.replace("\n", u""))
# strip superfluous Qt formatting
html = re.sub(u"(?:-qt-table-type: root; )?"
"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
"margin-right:\d+px;(?: -qt-block-indent:0; "
"text-indent:0px;)?", u"", html)
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
# strip leading space in style statements, and remove if no contents
html = re.sub(u'style=" ', u'style="', html)
html = re.sub(u' style=""', u"", html)
# convert P tags into SPAN and/or BR
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
html = re.sub(u'<br>$', u'', html)
html = re.sub(u"^<table><tr><td style=\"border: none;\">(.*)<br></td></tr></table>$", u"\\1", html)
# this is being added by qt's html editor, and leads to unwanted spaces
html = re.sub(u"^<p dir='rtl'>(.*?)</p>$", u'\\1', html)
return html
def entsToTxt(html):
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, html)
# IDs
##############################################################################
def genID(static=[]):
"Generate a random, unique 64bit ID."
# 23 bits of randomness, 41 bits of current time
# random rather than a counter to ensure efficient btree
t = long(time.time()*1000)
if not static:
static.extend([t, {}])
else:
if static[0] != t:
static[0] = t
static[1] = {}
while 1:
rand = random.getrandbits(23)
if rand not in static[1]:
static[1][rand] = True
break
x = rand << 41 | t
# turn into a signed long
if x >= 9223372036854775808L:
x -= 18446744073709551616L
return x
def hexifyID(id):
if id < 0:
id += 18446744073709551616L
return "%x" % id
def dehexifyID(id):
id = int(id, 16)
if id >= 9223372036854775808L:
id -= 18446744073709551616L
return id
def ids2str(ids):
"""Given a list of integers, return a string '(int1,int2,.)'
The caller is responsible for ensuring only integers are provided.
This is safe if you use sqlite primary key columns, which are guaranteed
to be integers."""
return "(%s)" % ",".join([str(i) for i in ids])
# Tags
##############################################################################
def parseTags(tags):
"Parse a string and return a list of tags."
tags = re.split(" |, ?", tags)
return [t.strip() for t in tags if t.strip()]
def joinTags(tags):
return u" ".join(tags)
def canonifyTags(tags):
"Strip leading/trailing/superfluous commas and duplicates."
tags = [t.lstrip(":") for t in set(parseTags(tags))]
return joinTags(sorted(tags))
def findTag(tag, tags):
"True if TAG is in TAGS. Ignore case."
if not isinstance(tags, types.ListType):
tags = parseTags(tags)
return tag.lower() in [t.lower() for t in tags]
def addTags(tagstr, tags):
"Add tags if they don't exist."
currentTags = parseTags(tags)
for tag in parseTags(tagstr):
if not findTag(tag, currentTags):
currentTags.append(tag)
return joinTags(currentTags)
def deleteTags(tagstr, tags):
"Delete tags if they don't exists."
currentTags = parseTags(tags)
for tag in parseTags(tagstr):
try:
currentTags.remove(tag)
except ValueError:
pass
return joinTags(currentTags)
# Misc
##############################################################################
def checksum(data):
return md5(data).hexdigest()
def fieldChecksum(data):
# 8 digit md5 hash of utf8 string, or empty string if empty value
if not data:
return ""
return checksum(data.encode("utf-8"))[:8]
def call(argv, wait=True, **kwargs):
try:
o = subprocess.Popen(argv, **kwargs)
except OSError:
# command not found
return -1
if wait:
while 1:
try:
ret = o.wait()
except OSError:
# interrupted system call
continue
break
else:
ret = 0
return ret