mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 06:22:22 -04:00

- removed 'created' column from various tables. We don't care when things like models are created, and card creation time didn't reflect the actual time a card was created - facts were previously ordered by their creation date. The code would manually set the creation time for subsequent facts on import by 0.0001 seconds, and then card due times were set by adding the fact time to the ordinal number*0.000001. This was prone to error, and the number of zeros used was actually different in different parts of the code. Instead of this, we replace it with a 'pos' column on facts, which increments for each new fact. - importing should add new facts with a higher pos, but concurrent updates in a synced deck can have multiple facts with the same pos - due times are completely different now, and depend on the card type - new cards have due=fact.pos or random(0, 10000) - reviews have due set to an integer representing days since deck creation/download - cards in the learn queue use an integer timestamp in seconds - many columns like modified, lastSync, factor, interval, etc have been converted to integer columns. They are cheaper to store (large decks can save 10s of megabytes) and faster to search for. - cards have their group assigned on fact creation. In the future we'll add a per-template option for a default group. - switch to due/random order for the review queue on upgrade. Users can still switch to the old behaviour if they want, but many people don't care what it's set to, and due is considerably faster, which may result in a better user experience
299 lines
9.1 KiB
Python
299 lines
9.1 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright: Damien Elmes <anki@ichi2.net>
|
|
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
|
|
|
|
import re, os, random, time, types, math, htmlentitydefs, subprocess
|
|
|
|
try:
|
|
import hashlib
|
|
md5 = hashlib.md5
|
|
except ImportError:
|
|
import md5
|
|
md5 = md5.new
|
|
|
|
from anki.db import *
|
|
from anki.lang import _, ngettext
|
|
import locale, sys
|
|
|
|
if sys.version_info[1] < 5:
|
|
def format_string(a, b):
|
|
return a % b
|
|
locale.format_string = format_string
|
|
|
|
# Time handling
|
|
##############################################################################
|
|
|
|
def intTime():
|
|
return int(time.time())
|
|
|
|
timeTable = {
|
|
"years": lambda n: ngettext("%s year", "%s years", n),
|
|
"months": lambda n: ngettext("%s month", "%s months", n),
|
|
"days": lambda n: ngettext("%s day", "%s days", n),
|
|
"hours": lambda n: ngettext("%s hour", "%s hours", n),
|
|
"minutes": lambda n: ngettext("%s minute", "%s minutes", n),
|
|
"seconds": lambda n: ngettext("%s second", "%s seconds", n),
|
|
}
|
|
|
|
afterTimeTable = {
|
|
"years": lambda n: ngettext("%s year<!--after-->", "%s years<!--after-->", n),
|
|
"months": lambda n: ngettext("%s month<!--after-->", "%s months<!--after-->", n),
|
|
"days": lambda n: ngettext("%s day<!--after-->", "%s days<!--after-->", n),
|
|
"hours": lambda n: ngettext("%s hour<!--after-->", "%s hours<!--after-->", n),
|
|
"minutes": lambda n: ngettext("%s minute<!--after-->", "%s minutes<!--after-->", n),
|
|
"seconds": lambda n: ngettext("%s second<!--after-->", "%s seconds<!--after-->", n),
|
|
}
|
|
|
|
shortTimeTable = {
|
|
"years": _("%sy"),
|
|
"months": _("%sm"),
|
|
"days": _("%sd"),
|
|
"hours": _("%sh"),
|
|
"minutes": _("%sm"),
|
|
"seconds": _("%ss"),
|
|
}
|
|
|
|
def fmtTimeSpan(time, pad=0, point=0, short=False, after=False):
|
|
"Return a string representing a time span (eg '2 days')."
|
|
(type, point) = optimalPeriod(time, point)
|
|
time = convertSecondsTo(time, type)
|
|
if not point:
|
|
time = math.floor(time)
|
|
if short:
|
|
fmt = shortTimeTable[type]
|
|
else:
|
|
if after:
|
|
fmt = afterTimeTable[type](_pluralCount(time, point))
|
|
else:
|
|
fmt = timeTable[type](_pluralCount(time, point))
|
|
timestr = "%(a)d.%(b)df" % {'a': pad, 'b': point}
|
|
return locale.format_string("%" + (fmt % timestr), time)
|
|
|
|
def optimalPeriod(time, point):
|
|
if abs(time) < 60:
|
|
type = "seconds"
|
|
point -= 1
|
|
elif abs(time) < 3599:
|
|
type = "minutes"
|
|
elif abs(time) < 60 * 60 * 24:
|
|
type = "hours"
|
|
elif abs(time) < 60 * 60 * 24 * 30:
|
|
type = "days"
|
|
elif abs(time) < 60 * 60 * 24 * 365:
|
|
type = "months"
|
|
point += 1
|
|
else:
|
|
type = "years"
|
|
point += 1
|
|
return (type, max(point, 0))
|
|
|
|
def convertSecondsTo(seconds, type):
|
|
if type == "seconds":
|
|
return seconds
|
|
elif type == "minutes":
|
|
return seconds / 60.0
|
|
elif type == "hours":
|
|
return seconds / 3600.0
|
|
elif type == "days":
|
|
return seconds / 86400.0
|
|
elif type == "months":
|
|
return seconds / 2592000.0
|
|
elif type == "years":
|
|
return seconds / 31536000.0
|
|
assert False
|
|
|
|
def _pluralCount(time, point):
|
|
if point:
|
|
return 2
|
|
return math.floor(time)
|
|
|
|
# Locale
|
|
##############################################################################
|
|
|
|
def fmtPercentage(float_value, point=1):
|
|
"Return float with percentage sign"
|
|
fmt = '%' + "0.%(b)df" % {'b': point}
|
|
return locale.format_string(fmt, float_value) + "%"
|
|
|
|
def fmtFloat(float_value, point=1):
|
|
"Return a string with decimal separator according to current locale"
|
|
fmt = '%' + "0.%(b)df" % {'b': point}
|
|
return locale.format_string(fmt, float_value)
|
|
|
|
# HTML
|
|
##############################################################################
|
|
|
|
def stripHTML(s):
|
|
s = re.sub("(?s)<style.*?>.*?</style>", "", s)
|
|
s = re.sub("(?s)<script.*?>.*?</script>", "", s)
|
|
s = re.sub("<.*?>", "", s)
|
|
s = entsToTxt(s)
|
|
return s
|
|
|
|
def stripHTMLAlt(s):
|
|
"Strip HTML, preserving img alt text."
|
|
s = re.sub("<img [^>]*alt=[\"']?([^\"'>]+)[\"']?[^>]*>", "\\1", s)
|
|
return stripHTML(s)
|
|
|
|
def stripHTMLMedia(s):
|
|
"Strip HTML but keep media filenames"
|
|
s = re.sub("<img src=[\"']?([^\"'>]+)[\"']? ?/?>", " \\1 ", s)
|
|
return stripHTML(s)
|
|
|
|
def tidyHTML(html):
|
|
"Remove cruft like body tags and return just the important part."
|
|
# contents of body - no head or html tags
|
|
html = re.sub(u".*<body.*?>(.*)</body></html>",
|
|
"\\1", html.replace("\n", u""))
|
|
# strip superfluous Qt formatting
|
|
html = re.sub(u"(?:-qt-table-type: root; )?"
|
|
"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
|
|
"margin-right:\d+px;(?: -qt-block-indent:0; "
|
|
"text-indent:0px;)?", u"", html)
|
|
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
|
|
# strip leading space in style statements, and remove if no contents
|
|
html = re.sub(u'style=" ', u'style="', html)
|
|
html = re.sub(u' style=""', u"", html)
|
|
# convert P tags into SPAN and/or BR
|
|
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
|
|
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
|
|
html = re.sub(u'<br>$', u'', html)
|
|
html = re.sub(u"^<table><tr><td style=\"border: none;\">(.*)<br></td></tr></table>$", u"\\1", html)
|
|
# this is being added by qt's html editor, and leads to unwanted spaces
|
|
html = re.sub(u"^<p dir='rtl'>(.*?)</p>$", u'\\1', html)
|
|
return html
|
|
|
|
def entsToTxt(html):
|
|
def fixup(m):
|
|
text = m.group(0)
|
|
if text[:2] == "&#":
|
|
# character reference
|
|
try:
|
|
if text[:3] == "&#x":
|
|
return unichr(int(text[3:-1], 16))
|
|
else:
|
|
return unichr(int(text[2:-1]))
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
# named entity
|
|
try:
|
|
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
|
except KeyError:
|
|
pass
|
|
return text # leave as is
|
|
return re.sub("&#?\w+;", fixup, html)
|
|
|
|
# IDs
|
|
##############################################################################
|
|
|
|
def genID(static=[]):
|
|
"Generate a random, unique 64bit ID."
|
|
# 23 bits of randomness, 41 bits of current time
|
|
# random rather than a counter to ensure efficient btree
|
|
t = long(time.time()*1000)
|
|
if not static:
|
|
static.extend([t, {}])
|
|
else:
|
|
if static[0] != t:
|
|
static[0] = t
|
|
static[1] = {}
|
|
while 1:
|
|
rand = random.getrandbits(23)
|
|
if rand not in static[1]:
|
|
static[1][rand] = True
|
|
break
|
|
x = rand << 41 | t
|
|
# turn into a signed long
|
|
if x >= 9223372036854775808L:
|
|
x -= 18446744073709551616L
|
|
return x
|
|
|
|
def hexifyID(id):
|
|
if id < 0:
|
|
id += 18446744073709551616L
|
|
return "%x" % id
|
|
|
|
def dehexifyID(id):
|
|
id = int(id, 16)
|
|
if id >= 9223372036854775808L:
|
|
id -= 18446744073709551616L
|
|
return id
|
|
|
|
def ids2str(ids):
|
|
"""Given a list of integers, return a string '(int1,int2,.)'
|
|
|
|
The caller is responsible for ensuring only integers are provided.
|
|
This is safe if you use sqlite primary key columns, which are guaranteed
|
|
to be integers."""
|
|
return "(%s)" % ",".join([str(i) for i in ids])
|
|
|
|
# Tags
|
|
##############################################################################
|
|
|
|
def parseTags(tags):
|
|
"Parse a string and return a list of tags."
|
|
tags = re.split(" |, ?", tags)
|
|
return [t.strip() for t in tags if t.strip()]
|
|
|
|
def joinTags(tags):
|
|
return u" ".join(tags)
|
|
|
|
def canonifyTags(tags):
|
|
"Strip leading/trailing/superfluous commas and duplicates."
|
|
tags = [t.lstrip(":") for t in set(parseTags(tags))]
|
|
return joinTags(sorted(tags))
|
|
|
|
def findTag(tag, tags):
|
|
"True if TAG is in TAGS. Ignore case."
|
|
if not isinstance(tags, types.ListType):
|
|
tags = parseTags(tags)
|
|
return tag.lower() in [t.lower() for t in tags]
|
|
|
|
def addTags(tagstr, tags):
|
|
"Add tags if they don't exist."
|
|
currentTags = parseTags(tags)
|
|
for tag in parseTags(tagstr):
|
|
if not findTag(tag, currentTags):
|
|
currentTags.append(tag)
|
|
return joinTags(currentTags)
|
|
|
|
def deleteTags(tagstr, tags):
|
|
"Delete tags if they don't exists."
|
|
currentTags = parseTags(tags)
|
|
for tag in parseTags(tagstr):
|
|
try:
|
|
currentTags.remove(tag)
|
|
except ValueError:
|
|
pass
|
|
return joinTags(currentTags)
|
|
|
|
# Misc
|
|
##############################################################################
|
|
|
|
def checksum(data):
|
|
return md5(data).hexdigest()
|
|
|
|
def fieldChecksum(data):
|
|
# 8 digit md5 hash of utf8 string, or empty string if empty value
|
|
if not data:
|
|
return ""
|
|
return checksum(data.encode("utf-8"))[:8]
|
|
|
|
def call(argv, wait=True, **kwargs):
|
|
try:
|
|
o = subprocess.Popen(argv, **kwargs)
|
|
except OSError:
|
|
# command not found
|
|
return -1
|
|
if wait:
|
|
while 1:
|
|
try:
|
|
ret = o.wait()
|
|
except OSError:
|
|
# interrupted system call
|
|
continue
|
|
break
|
|
else:
|
|
ret = 0
|
|
return ret
|