mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 06:22:22 -04:00

The tags tables were initially added to speed up the loading of the browser by speeding up two operations: gathering a list of all tags to show in the dropdown box, and finding cards with a given tag. The former functionality is provided by the tags table, and the latter functionality by the cardTags table. Selective study is handled by groups now, which perform better since they don't require a join or subselect, and can be embedded in the index. So the only remaining benefit of cardTags is for the browser. Performance testing indicates that cardTags is not saving us a large amount. It only takes us 30ms to search a 50k card table for matches with a hot cache. On a cold cache it means the facts table has to be loaded into memory, which roughly doubles the load time with the default settings (we need to load the cards table too, as we're sorting the cards), but that startup time was necessary with certain settings in the past too (sorting by fact created for example). With groups implemented, the cost of maintaining a cache just for initial browser load time is hard to justify. Other changes: - the tags table has any missing tags added to it when facts are added/edited. This means old tags will stick around even when no cards reference them, but is much cheaper than reference counting or a separate table, and simplifies updates and syncing. - the tags table has a modified field now so we can can sync it instead of having to scan all facts coming across in a sync - priority field removed - we no longer put model names or card templates into the tags table. There were two reasons we did this in the past: so we could cram/selective study them, and for plugins. Selective study uses groups now, and plugins can check the model's name instead (and most already do). This also does away with the somewhat confusing behaviour of names also being tags. - facts have their tags as _tags now. You can get a list with tags(), but editing operations should use add/deleteTags() instead of manually editing the string.
303 lines
9.2 KiB
Python
303 lines
9.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright: Damien Elmes <anki@ichi2.net>
|
|
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
|
|
|
|
import re, os, random, time, types, math, htmlentitydefs, subprocess
|
|
|
|
try:
|
|
import hashlib
|
|
md5 = hashlib.md5
|
|
except ImportError:
|
|
import md5
|
|
md5 = md5.new
|
|
|
|
from anki.db import *
|
|
from anki.lang import _, ngettext
|
|
import locale, sys
|
|
|
|
if sys.version_info[1] < 5:
|
|
def format_string(a, b):
|
|
return a % b
|
|
locale.format_string = format_string
|
|
|
|
# Time handling
|
|
##############################################################################
|
|
|
|
def intTime():
|
|
return int(time.time())
|
|
|
|
timeTable = {
|
|
"years": lambda n: ngettext("%s year", "%s years", n),
|
|
"months": lambda n: ngettext("%s month", "%s months", n),
|
|
"days": lambda n: ngettext("%s day", "%s days", n),
|
|
"hours": lambda n: ngettext("%s hour", "%s hours", n),
|
|
"minutes": lambda n: ngettext("%s minute", "%s minutes", n),
|
|
"seconds": lambda n: ngettext("%s second", "%s seconds", n),
|
|
}
|
|
|
|
afterTimeTable = {
|
|
"years": lambda n: ngettext("%s year<!--after-->", "%s years<!--after-->", n),
|
|
"months": lambda n: ngettext("%s month<!--after-->", "%s months<!--after-->", n),
|
|
"days": lambda n: ngettext("%s day<!--after-->", "%s days<!--after-->", n),
|
|
"hours": lambda n: ngettext("%s hour<!--after-->", "%s hours<!--after-->", n),
|
|
"minutes": lambda n: ngettext("%s minute<!--after-->", "%s minutes<!--after-->", n),
|
|
"seconds": lambda n: ngettext("%s second<!--after-->", "%s seconds<!--after-->", n),
|
|
}
|
|
|
|
shortTimeTable = {
|
|
"years": _("%sy"),
|
|
"months": _("%sm"),
|
|
"days": _("%sd"),
|
|
"hours": _("%sh"),
|
|
"minutes": _("%sm"),
|
|
"seconds": _("%ss"),
|
|
}
|
|
|
|
def fmtTimeSpan(time, pad=0, point=0, short=False, after=False):
|
|
"Return a string representing a time span (eg '2 days')."
|
|
(type, point) = optimalPeriod(time, point)
|
|
time = convertSecondsTo(time, type)
|
|
if not point:
|
|
time = math.floor(time)
|
|
if short:
|
|
fmt = shortTimeTable[type]
|
|
else:
|
|
if after:
|
|
fmt = afterTimeTable[type](_pluralCount(time, point))
|
|
else:
|
|
fmt = timeTable[type](_pluralCount(time, point))
|
|
timestr = "%(a)d.%(b)df" % {'a': pad, 'b': point}
|
|
return locale.format_string("%" + (fmt % timestr), time)
|
|
|
|
def optimalPeriod(time, point):
|
|
if abs(time) < 60:
|
|
type = "seconds"
|
|
point -= 1
|
|
elif abs(time) < 3599:
|
|
type = "minutes"
|
|
elif abs(time) < 60 * 60 * 24:
|
|
type = "hours"
|
|
elif abs(time) < 60 * 60 * 24 * 30:
|
|
type = "days"
|
|
elif abs(time) < 60 * 60 * 24 * 365:
|
|
type = "months"
|
|
point += 1
|
|
else:
|
|
type = "years"
|
|
point += 1
|
|
return (type, max(point, 0))
|
|
|
|
def convertSecondsTo(seconds, type):
|
|
if type == "seconds":
|
|
return seconds
|
|
elif type == "minutes":
|
|
return seconds / 60.0
|
|
elif type == "hours":
|
|
return seconds / 3600.0
|
|
elif type == "days":
|
|
return seconds / 86400.0
|
|
elif type == "months":
|
|
return seconds / 2592000.0
|
|
elif type == "years":
|
|
return seconds / 31536000.0
|
|
assert False
|
|
|
|
def _pluralCount(time, point):
|
|
if point:
|
|
return 2
|
|
return math.floor(time)
|
|
|
|
# Locale
|
|
##############################################################################
|
|
|
|
def fmtPercentage(float_value, point=1):
|
|
"Return float with percentage sign"
|
|
fmt = '%' + "0.%(b)df" % {'b': point}
|
|
return locale.format_string(fmt, float_value) + "%"
|
|
|
|
def fmtFloat(float_value, point=1):
|
|
"Return a string with decimal separator according to current locale"
|
|
fmt = '%' + "0.%(b)df" % {'b': point}
|
|
return locale.format_string(fmt, float_value)
|
|
|
|
# HTML
|
|
##############################################################################
|
|
|
|
def stripHTML(s):
|
|
s = re.sub("(?s)<style.*?>.*?</style>", "", s)
|
|
s = re.sub("(?s)<script.*?>.*?</script>", "", s)
|
|
s = re.sub("<.*?>", "", s)
|
|
s = entsToTxt(s)
|
|
return s
|
|
|
|
def stripHTMLAlt(s):
|
|
"Strip HTML, preserving img alt text."
|
|
s = re.sub("<img [^>]*alt=[\"']?([^\"'>]+)[\"']?[^>]*>", "\\1", s)
|
|
return stripHTML(s)
|
|
|
|
def stripHTMLMedia(s):
|
|
"Strip HTML but keep media filenames"
|
|
s = re.sub("<img src=[\"']?([^\"'>]+)[\"']? ?/?>", " \\1 ", s)
|
|
return stripHTML(s)
|
|
|
|
def tidyHTML(html):
|
|
"Remove cruft like body tags and return just the important part."
|
|
# contents of body - no head or html tags
|
|
html = re.sub(u".*<body.*?>(.*)</body></html>",
|
|
"\\1", html.replace("\n", u""))
|
|
# strip superfluous Qt formatting
|
|
html = re.sub(u"(?:-qt-table-type: root; )?"
|
|
"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
|
|
"margin-right:\d+px;(?: -qt-block-indent:0; "
|
|
"text-indent:0px;)?", u"", html)
|
|
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
|
|
# strip leading space in style statements, and remove if no contents
|
|
html = re.sub(u'style=" ', u'style="', html)
|
|
html = re.sub(u' style=""', u"", html)
|
|
# convert P tags into SPAN and/or BR
|
|
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
|
|
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
|
|
html = re.sub(u'<br>$', u'', html)
|
|
html = re.sub(u"^<table><tr><td style=\"border: none;\">(.*)<br></td></tr></table>$", u"\\1", html)
|
|
# this is being added by qt's html editor, and leads to unwanted spaces
|
|
html = re.sub(u"^<p dir='rtl'>(.*?)</p>$", u'\\1', html)
|
|
return html
|
|
|
|
def entsToTxt(html):
|
|
def fixup(m):
|
|
text = m.group(0)
|
|
if text[:2] == "&#":
|
|
# character reference
|
|
try:
|
|
if text[:3] == "&#x":
|
|
return unichr(int(text[3:-1], 16))
|
|
else:
|
|
return unichr(int(text[2:-1]))
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
# named entity
|
|
try:
|
|
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
|
except KeyError:
|
|
pass
|
|
return text # leave as is
|
|
return re.sub("&#?\w+;", fixup, html)
|
|
|
|
# IDs
|
|
##############################################################################
|
|
|
|
def genID(static=[]):
|
|
"Generate a random, unique 64bit ID."
|
|
# 23 bits of randomness, 41 bits of current time
|
|
# random rather than a counter to ensure efficient btree
|
|
t = long(time.time()*1000)
|
|
if not static:
|
|
static.extend([t, {}])
|
|
else:
|
|
if static[0] != t:
|
|
static[0] = t
|
|
static[1] = {}
|
|
while 1:
|
|
rand = random.getrandbits(23)
|
|
if rand not in static[1]:
|
|
static[1][rand] = True
|
|
break
|
|
x = rand << 41 | t
|
|
# turn into a signed long
|
|
if x >= 9223372036854775808L:
|
|
x -= 18446744073709551616L
|
|
return x
|
|
|
|
def hexifyID(id):
|
|
if id < 0:
|
|
id += 18446744073709551616L
|
|
return "%x" % id
|
|
|
|
def dehexifyID(id):
|
|
id = int(id, 16)
|
|
if id >= 9223372036854775808L:
|
|
id -= 18446744073709551616L
|
|
return id
|
|
|
|
def ids2str(ids):
|
|
"""Given a list of integers, return a string '(int1,int2,.)'
|
|
|
|
The caller is responsible for ensuring only integers are provided.
|
|
This is safe if you use sqlite primary key columns, which are guaranteed
|
|
to be integers."""
|
|
return "(%s)" % ",".join([str(i) for i in ids])
|
|
|
|
# Tags
|
|
##############################################################################
|
|
|
|
def parseTags(tags):
|
|
"Parse a string and return a list of tags."
|
|
return [t for t in tags.split(" ") if t]
|
|
|
|
def joinTags(tags):
|
|
"Join tags into a single string, with leading and trailing spaces."
|
|
if not tags:
|
|
return u""
|
|
return u" %s " % u" ".join(tags)
|
|
|
|
def canonifyTags(tags):
|
|
"Strip leading/trailing/superfluous commas and duplicates."
|
|
tags = [t.lstrip(":") for t in set(parseTags(tags))]
|
|
return joinTags(sorted(tags))
|
|
|
|
def findTag(tag, tags):
|
|
"True if TAG is in TAGS. Ignore case."
|
|
return tag.lower() in [t.lower() for t in tags]
|
|
|
|
def addTags(addtags, tags):
|
|
"Add tags if they don't exist."
|
|
currentTags = parseTags(tags)
|
|
for tag in parseTags(addtags):
|
|
if not findTag(tag, currentTags):
|
|
currentTags.append(tag)
|
|
return joinTags(currentTags)
|
|
|
|
def deleteTags(deltags, tags):
|
|
"Delete tags if they don't exists."
|
|
currentTags = parseTags(tags)
|
|
for tag in parseTags(deltags):
|
|
# find tags, ignoring case
|
|
remove = []
|
|
for tx in currentTags:
|
|
if tag.lower() == tx.lower():
|
|
remove.append(tx)
|
|
# remove them
|
|
for r in remove:
|
|
currentTags.remove(r)
|
|
return joinTags(currentTags)
|
|
|
|
# Misc
|
|
##############################################################################
|
|
|
|
def checksum(data):
|
|
return md5(data).hexdigest()
|
|
|
|
def fieldChecksum(data):
|
|
# 8 digit md5 hash of utf8 string, or empty string if empty value
|
|
if not data:
|
|
return ""
|
|
return checksum(data.encode("utf-8"))[:8]
|
|
|
|
def call(argv, wait=True, **kwargs):
|
|
try:
|
|
o = subprocess.Popen(argv, **kwargs)
|
|
except OSError:
|
|
# command not found
|
|
return -1
|
|
if wait:
|
|
while 1:
|
|
try:
|
|
ret = o.wait()
|
|
except OSError:
|
|
# interrupted system call
|
|
continue
|
|
break
|
|
else:
|
|
ret = 0
|
|
return ret
|