mirror of
https://github.com/ankitects/anki.git
synced 2025-11-15 17:17:11 -05:00
SQLAlchemy is a great tool, but it wasn't a great fit for Anki: - We often had to drop down to raw SQL for performance reasons. - The DB cursors and results were wrapped, which incurred a sizable performance hit due to introspection. Operations like fetching 50k records from a hot cache were taking more than twice as long to complete. - We take advantage of sqlite-specific features, so SQL language abstraction is useless to us. - The anki schema is quite small, so manually saving and loading objects is not a big burden. In the process of porting to DBAPI, I've refactored the database schema: - App configuration data that we don't need in joins or bulk updates has been moved into JSON objects. This simplifies serializing, and means we won't need DB schema changes to store extra options in the future. This change obsoletes the deckVars table. - Renamed tables: -- fieldModels -> fields -- cardModels -> templates -- fields -> fdata - a number of attribute names have been shortened Classes like Card, Fact & Model remain. They maintain a reference to the deck. To write their state to the DB, call .flush(). Objects no longer have their modification time manually updated. Instead, the modification time is updated when they are flushed. This also applies to the deck. Decks will now save on close, because various operations that were done at deck load will be moved into deck close instead. Operations like undoing buried card are cheap on a hot cache, but expensive on startup. Programmatically you can call .close(save=False) to avoid a save and a modification bump. This will be useful for generating due counts. Because of the new saving behaviour, the save and save as options will be removed from the GUI in the future. The q/a cache and field cache generating has been centralized. Facts will automatically rebuild the cache on flush; models can do so with model.updateCache(). Media handling has also been reworked. It has moved into a MediaRegistry object, which the deck holds. Refcounting has been dropped - it meant we had to compare old and new value every time facts or models were changed, and existed for the sole purpose of not showing errors on a missing media download. Instead we just media.registerText(q+a) when it's updated. The download function will be expanded to ask the user if they want to continue after a certain number of files have failed to download, which should be an adequate alternative. And we now add the file into the media DB when it's copied to th emedia directory, not when the card is commited. This fixes duplicates a user would get if they added the same media to a card twice without adding the card. The old DeckStorage object had its upgrade code split in a previous commit; the opening and upgrading code has been merged back together, and put in a separate storage.py file. The correct way to open a deck now is import anki; d = anki.Deck(path). deck.getCard() -> deck.sched.getCard() same with answerCard deck.getCard(id) returns a Card object now. And the DB wrapper has had a few changes: - sql statements are a more standard DBAPI: - statement() -> execute() - statements() -> executemany() - called like execute(sql, 1, 2, 3) or execute(sql, a=1, b=2, c=3) - column0 -> list
302 lines
9.2 KiB
Python
302 lines
9.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright: Damien Elmes <anki@ichi2.net>
|
|
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
|
|
|
|
import re, os, random, time, types, math, htmlentitydefs, subprocess
|
|
|
|
try:
|
|
import hashlib
|
|
md5 = hashlib.md5
|
|
except ImportError:
|
|
import md5
|
|
md5 = md5.new
|
|
|
|
from anki.lang import _, ngettext
|
|
import locale, sys
|
|
|
|
if sys.version_info[1] < 5:
|
|
def format_string(a, b):
|
|
return a % b
|
|
locale.format_string = format_string
|
|
|
|
# Time handling
|
|
##############################################################################
|
|
|
|
def intTime():
|
|
return int(time.time())
|
|
|
|
timeTable = {
|
|
"years": lambda n: ngettext("%s year", "%s years", n),
|
|
"months": lambda n: ngettext("%s month", "%s months", n),
|
|
"days": lambda n: ngettext("%s day", "%s days", n),
|
|
"hours": lambda n: ngettext("%s hour", "%s hours", n),
|
|
"minutes": lambda n: ngettext("%s minute", "%s minutes", n),
|
|
"seconds": lambda n: ngettext("%s second", "%s seconds", n),
|
|
}
|
|
|
|
afterTimeTable = {
|
|
"years": lambda n: ngettext("%s year<!--after-->", "%s years<!--after-->", n),
|
|
"months": lambda n: ngettext("%s month<!--after-->", "%s months<!--after-->", n),
|
|
"days": lambda n: ngettext("%s day<!--after-->", "%s days<!--after-->", n),
|
|
"hours": lambda n: ngettext("%s hour<!--after-->", "%s hours<!--after-->", n),
|
|
"minutes": lambda n: ngettext("%s minute<!--after-->", "%s minutes<!--after-->", n),
|
|
"seconds": lambda n: ngettext("%s second<!--after-->", "%s seconds<!--after-->", n),
|
|
}
|
|
|
|
shortTimeTable = {
|
|
"years": _("%sy"),
|
|
"months": _("%sm"),
|
|
"days": _("%sd"),
|
|
"hours": _("%sh"),
|
|
"minutes": _("%sm"),
|
|
"seconds": _("%ss"),
|
|
}
|
|
|
|
def fmtTimeSpan(time, pad=0, point=0, short=False, after=False):
|
|
"Return a string representing a time span (eg '2 days')."
|
|
(type, point) = optimalPeriod(time, point)
|
|
time = convertSecondsTo(time, type)
|
|
if not point:
|
|
time = math.floor(time)
|
|
if short:
|
|
fmt = shortTimeTable[type]
|
|
else:
|
|
if after:
|
|
fmt = afterTimeTable[type](_pluralCount(time, point))
|
|
else:
|
|
fmt = timeTable[type](_pluralCount(time, point))
|
|
timestr = "%(a)d.%(b)df" % {'a': pad, 'b': point}
|
|
return locale.format_string("%" + (fmt % timestr), time)
|
|
|
|
def optimalPeriod(time, point):
|
|
if abs(time) < 60:
|
|
type = "seconds"
|
|
point -= 1
|
|
elif abs(time) < 3599:
|
|
type = "minutes"
|
|
elif abs(time) < 60 * 60 * 24:
|
|
type = "hours"
|
|
elif abs(time) < 60 * 60 * 24 * 30:
|
|
type = "days"
|
|
elif abs(time) < 60 * 60 * 24 * 365:
|
|
type = "months"
|
|
point += 1
|
|
else:
|
|
type = "years"
|
|
point += 1
|
|
return (type, max(point, 0))
|
|
|
|
def convertSecondsTo(seconds, type):
|
|
if type == "seconds":
|
|
return seconds
|
|
elif type == "minutes":
|
|
return seconds / 60.0
|
|
elif type == "hours":
|
|
return seconds / 3600.0
|
|
elif type == "days":
|
|
return seconds / 86400.0
|
|
elif type == "months":
|
|
return seconds / 2592000.0
|
|
elif type == "years":
|
|
return seconds / 31536000.0
|
|
assert False
|
|
|
|
def _pluralCount(time, point):
|
|
if point:
|
|
return 2
|
|
return math.floor(time)
|
|
|
|
# Locale
|
|
##############################################################################
|
|
|
|
def fmtPercentage(float_value, point=1):
|
|
"Return float with percentage sign"
|
|
fmt = '%' + "0.%(b)df" % {'b': point}
|
|
return locale.format_string(fmt, float_value) + "%"
|
|
|
|
def fmtFloat(float_value, point=1):
|
|
"Return a string with decimal separator according to current locale"
|
|
fmt = '%' + "0.%(b)df" % {'b': point}
|
|
return locale.format_string(fmt, float_value)
|
|
|
|
# HTML
|
|
##############################################################################
|
|
|
|
def stripHTML(s):
|
|
s = re.sub("(?s)<style.*?>.*?</style>", "", s)
|
|
s = re.sub("(?s)<script.*?>.*?</script>", "", s)
|
|
s = re.sub("<.*?>", "", s)
|
|
s = entsToTxt(s)
|
|
return s
|
|
|
|
def stripHTMLAlt(s):
|
|
"Strip HTML, preserving img alt text."
|
|
s = re.sub("<img [^>]*alt=[\"']?([^\"'>]+)[\"']?[^>]*>", "\\1", s)
|
|
return stripHTML(s)
|
|
|
|
def stripHTMLMedia(s):
|
|
"Strip HTML but keep media filenames"
|
|
s = re.sub("<img src=[\"']?([^\"'>]+)[\"']? ?/?>", " \\1 ", s)
|
|
return stripHTML(s)
|
|
|
|
def tidyHTML(html):
|
|
"Remove cruft like body tags and return just the important part."
|
|
# contents of body - no head or html tags
|
|
html = re.sub(u".*<body.*?>(.*)</body></html>",
|
|
"\\1", html.replace("\n", u""))
|
|
# strip superfluous Qt formatting
|
|
html = re.sub(u"(?:-qt-table-type: root; )?"
|
|
"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
|
|
"margin-right:\d+px;(?: -qt-block-indent:0; "
|
|
"text-indent:0px;)?", u"", html)
|
|
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
|
|
# strip leading space in style execute, and remove if no contents
|
|
html = re.sub(u'style=" ', u'style="', html)
|
|
html = re.sub(u' style=""', u"", html)
|
|
# convert P tags into SPAN and/or BR
|
|
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
|
|
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
|
|
html = re.sub(u'<br>$', u'', html)
|
|
html = re.sub(u"^<table><tr><td style=\"border: none;\">(.*)<br></td></tr></table>$", u"\\1", html)
|
|
# this is being added by qt's html editor, and leads to unwanted spaces
|
|
html = re.sub(u"^<p dir='rtl'>(.*?)</p>$", u'\\1', html)
|
|
return html
|
|
|
|
def entsToTxt(html):
|
|
def fixup(m):
|
|
text = m.group(0)
|
|
if text[:2] == "&#":
|
|
# character reference
|
|
try:
|
|
if text[:3] == "&#x":
|
|
return unichr(int(text[3:-1], 16))
|
|
else:
|
|
return unichr(int(text[2:-1]))
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
# named entity
|
|
try:
|
|
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
|
except KeyError:
|
|
pass
|
|
return text # leave as is
|
|
return re.sub("&#?\w+;", fixup, html)
|
|
|
|
# IDs
|
|
##############################################################################
|
|
|
|
def genID(static=[]):
|
|
"Generate a random, unique 64bit ID."
|
|
# 23 bits of randomness, 41 bits of current time
|
|
# random rather than a counter to ensure efficient btree
|
|
t = long(time.time()*1000)
|
|
if not static:
|
|
static.extend([t, {}])
|
|
else:
|
|
if static[0] != t:
|
|
static[0] = t
|
|
static[1] = {}
|
|
while 1:
|
|
rand = random.getrandbits(23)
|
|
if rand not in static[1]:
|
|
static[1][rand] = True
|
|
break
|
|
x = rand << 41 | t
|
|
# turn into a signed long
|
|
if x >= 9223372036854775808L:
|
|
x -= 18446744073709551616L
|
|
return x
|
|
|
|
def hexifyID(id):
|
|
if id < 0:
|
|
id += 18446744073709551616L
|
|
return "%x" % id
|
|
|
|
def dehexifyID(id):
|
|
id = int(id, 16)
|
|
if id >= 9223372036854775808L:
|
|
id -= 18446744073709551616L
|
|
return id
|
|
|
|
def ids2str(ids):
|
|
"""Given a list of integers, return a string '(int1,int2,.)'
|
|
|
|
The caller is responsible for ensuring only integers are provided.
|
|
This is safe if you use sqlite primary key columns, which are guaranteed
|
|
to be integers."""
|
|
return "(%s)" % ",".join([str(i) for i in ids])
|
|
|
|
# Tags
|
|
##############################################################################
|
|
|
|
def parseTags(tags):
|
|
"Parse a string and return a list of tags."
|
|
return [t for t in tags.split(" ") if t]
|
|
|
|
def joinTags(tags):
|
|
"Join tags into a single string, with leading and trailing spaces."
|
|
if not tags:
|
|
return u""
|
|
return u" %s " % u" ".join(tags)
|
|
|
|
def canonifyTags(tags):
|
|
"Strip leading/trailing/superfluous commas and duplicates."
|
|
tags = [t.lstrip(":") for t in set(parseTags(tags))]
|
|
return joinTags(sorted(tags))
|
|
|
|
def hasTag(tag, tags):
|
|
"True if TAG is in TAGS. Ignore case."
|
|
return tag.lower() in [t.lower() for t in tags]
|
|
|
|
def addTags(addtags, tags):
|
|
"Add tags if they don't exist."
|
|
currentTags = parseTags(tags)
|
|
for tag in parseTags(addtags):
|
|
if not hasTag(tag, currentTags):
|
|
currentTags.append(tag)
|
|
return joinTags(currentTags)
|
|
|
|
def deleteTags(deltags, tags):
|
|
"Delete tags if they don't exists."
|
|
currentTags = parseTags(tags)
|
|
for tag in parseTags(deltags):
|
|
# find tags, ignoring case
|
|
remove = []
|
|
for tx in currentTags:
|
|
if tag.lower() == tx.lower():
|
|
remove.append(tx)
|
|
# remove them
|
|
for r in remove:
|
|
currentTags.remove(r)
|
|
return joinTags(currentTags)
|
|
|
|
# Misc
|
|
##############################################################################
|
|
|
|
def checksum(data):
|
|
return md5(data).hexdigest()
|
|
|
|
def fieldChecksum(data):
|
|
# 8 digit md5 hash of utf8 string, or empty string if empty value
|
|
if not data:
|
|
return ""
|
|
return checksum(data.encode("utf-8"))[:8]
|
|
|
|
def call(argv, wait=True, **kwargs):
|
|
try:
|
|
o = subprocess.Popen(argv, **kwargs)
|
|
except OSError:
|
|
# command not found
|
|
return -1
|
|
if wait:
|
|
while 1:
|
|
try:
|
|
ret = o.wait()
|
|
except OSError:
|
|
# interrupted system call
|
|
continue
|
|
break
|
|
else:
|
|
ret = 0
|
|
return ret
|