Anki/anki/utils.py
2008-12-23 18:38:54 +09:00

227 lines
6.5 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
"""\
Miscellaneous utilities
==============================
"""
__docformat__ = 'restructuredtext'
import re, os, random, time, types
try:
import hashlib
md5 = hashlib.md5
except ImportError:
import md5
md5 = md5.new
from anki.db import *
from anki.lang import _, ngettext
# Time handling
##############################################################################
timeTable = {
"years": lambda n: ngettext("%s year", "%s years", n),
"months": lambda n: ngettext("%s month", "%s months", n),
"days": lambda n: ngettext("%s day", "%s days", n),
"hours": lambda n: ngettext("%s hour", "%s hours", n),
"minutes": lambda n: ngettext("%s minute", "%s minutes", n),
"seconds": lambda n: ngettext("%s second", "%s seconds", n),
}
shortTimeTable = {
"years": "%sy",
"months": "%sm",
"days": "%sd",
"hours": "%sh",
"minutes": "%sm",
"seconds": "%ss",
}
def fmtTimeSpan(time, pad=0, point=0, short=False):
"Return a string representing a time span (eg '2 days')."
(type, point) = optimalPeriod(time, point)
time = convertSecondsTo(time, type)
if short:
fmt = shortTimeTable[type]
else:
fmt = timeTable[type](_pluralCount(round(time, point)))
timestr = "%(a)d.%(b)df" % {'a': pad, 'b': point}
return ("%" + (fmt % timestr)) % time
def fmtTimeSpanPair(time1, time2, short=False):
(type, point) = optimalPeriod(time1, 0)
time1 = convertSecondsTo(time1, type)
time2 = convertSecondsTo(time2, type)
# a pair is always should always be read as plural
if short:
fmt = shortTimeTable[type]
else:
fmt = timeTable[type](2)
timestr = "%(a)d.%(b)df" % {'a': 0, 'b': point}
finalstr = "%s-%s" % (
('%' + timestr) % time1,
('%' + timestr) % time2)
return fmt % finalstr
def optimalPeriod(time, point):
if abs(time) < 60:
type = "seconds"
point -= 1
elif abs(time) < 3599:
type = "minutes"
elif abs(time) < 60 * 60 * 24:
type = "hours"
elif abs(time) < 60 * 60 * 24 * 30:
type = "days"
elif abs(time) < 60 * 60 * 24 * 365:
type = "months"
point += 1
else:
type = "years"
point += 1
return (type, max(point, 0))
def convertSecondsTo(seconds, type):
if type == "seconds":
return seconds
elif type == "minutes":
return seconds / 60.0
elif type == "hours":
return seconds / 3600.0
elif type == "days":
return seconds / 86400.0
elif type == "months":
return seconds / 2592000.0
elif type == "years":
return seconds / 31536000.0
assert False
def _pluralCount(time):
if round(time, 1) == 1:
return 1
return 2
# HTML
##############################################################################
def stripHTML(s):
s = re.sub("<.*?>", "", s)
s = s.replace("&lt;", "<")
s = s.replace("&gt;", ">")
return s
def tidyHTML(html):
"Remove cruft like body tags and return just the important part."
# contents of body - no head or html tags
html = re.sub(u".*<body.*?>(.*)</body></html>",
"\\1", html.replace("\n", u""))
# strip superfluous Qt formatting
html = re.sub(u"margin-top:\d+px; margin-bottom:\d+px; margin-left:\d+px; "
"margin-right:\d+px; -qt-block-indent:0; "
"text-indent:0px;", u"", html)
html = re.sub(u"-qt-paragraph-type:empty;", u"", html)
# strip leading space in style statements, and remove if no contents
html = re.sub(u'style=" ', u'style="', html)
html = re.sub(u' style=""', u"", html)
# convert P tags into SPAN and/or BR
html = re.sub(u'<p( style=.+?)>(.*?)</p>', u'<span\\1>\\2</span><br>', html)
html = re.sub(u'<p>(.*?)</p>', u'\\1<br>', html)
html = re.sub(u'<br>$', u'', html)
# remove leading or trailing whitespace
html = re.sub(u'^ +', u'', html)
html = re.sub(u' +$', u'', html)
return html
# IDs
##############################################################################
def genID(static=[]):
"Generate a random, unique 64bit ID."
# 23 bits of randomness, 41 bits of current time
# random rather than a counter to ensure efficient btree
t = long(time.time()*1000)
if not static:
static.extend([t, {}])
else:
if static[0] != t:
static[0] = t
static[1] = {}
while 1:
rand = random.getrandbits(23)
if rand not in static[1]:
static[1][rand] = True
break
x = rand << 41 | t
# turn into a signed long
if x >= 9223372036854775808L:
x -= 18446744073709551616L
return x
def hexifyID(id):
if id < 0:
id += 18446744073709551616L
return "%x" % id
def dehexifyID(id):
id = int(id, 16)
if id >= 9223372036854775808L:
id -= 18446744073709551616L
return id
def ids2str(ids):
"""Given a list of integers, return a string '(int1,int2,.)'
The caller is responsible for ensuring only integers are provided.
This is safe if you use sqlite primary key columns, which are guaranteed
to be integers."""
return "(%s)" % ",".join([str(i) for i in ids])
# Tags
##############################################################################
def parseTags(tags):
"Parse a string and return a list of tags."
tags = tags.split(",")
tags = [tag.strip() for tag in tags if tag.strip()]
return tags
def joinTags(tags):
return u", ".join(tags)
def canonifyTags(tags):
"Strip leading/trailing/superfluous commas and duplicates."
return joinTags(sorted(set(parseTags(tags))))
def findTag(tag, tags):
"True if TAG is in TAGS. Ignore case."
if not isinstance(tags, types.ListType):
tags = parseTags(tags)
return tag.lower() in [t.lower() for t in tags]
def addTags(tagstr, tags):
"Add tags if they don't exist."
currentTags = parseTags(tags)
for tag in parseTags(tagstr):
if not findTag(tag, currentTags):
currentTags.append(tag)
return joinTags(currentTags)
def deleteTags(tagstr, tags):
"Delete tags if they don't exists."
currentTags = parseTags(tags)
for tag in parseTags(tagstr):
try:
currentTags.remove(tag)
except ValueError:
pass
return joinTags(currentTags)
# Misc
##############################################################################
def checksum(data):
return md5(data).hexdigest()