Anki/anki/media.py
Damien Elmes 55f4b9b7d0 favour integers, change due representation, fact&card ordering, more
- removed 'created' column from various tables. We don't care when things like
  models are created, and card creation time didn't reflect the actual time a
  card was created
- facts were previously ordered by their creation date. The code would
  manually set the creation time for subsequent facts on import by 0.0001
  seconds, and then card due times were set by adding the fact time to the
  ordinal number*0.000001. This was prone to error, and the number of zeros used
  was actually different in different parts of the code. Instead of this, we
  replace it with a 'pos' column on facts, which increments for each new fact.
- importing should add new facts with a higher pos, but concurrent updates in
  a synced deck can have multiple facts with the same pos

- due times are completely different now, and depend on the card type
- new cards have due=fact.pos or random(0, 10000)
- reviews have due set to an integer representing days since deck
  creation/download
- cards in the learn queue use an integer timestamp in seconds

- many columns like modified, lastSync, factor, interval, etc have been converted to
  integer columns. They are cheaper to store (large decks can save 10s of
  megabytes) and faster to search for.

- cards have their group assigned on fact creation. In the future we'll add a
  per-template option for a default group.

- switch to due/random order for the review queue on upgrade. Users can still
  switch to the old behaviour if they want, but many people don't care what
  it's set to, and due is considerably faster, which may result in a better
  user experience
2011-04-28 09:23:28 +09:00

262 lines
8.7 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import os, shutil, re, urllib2, time, tempfile, unicodedata, urllib
from anki.db import *
from anki.utils import checksum, genID, intTime
from anki.lang import _
# other code depends on this order, so don't reorder
regexps = ("(?i)(\[sound:([^]]+)\])",
"(?i)(<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>)")
# Tables
##########################################################################
mediaTable = Table(
'media', metadata,
Column('id', Integer, primary_key=True, nullable=False),
Column('filename', UnicodeText, nullable=False, unique=True),
Column('refcnt', Integer, nullable=False),
Column('modified', Integer, nullable=False),
Column('chksum', UnicodeText, nullable=False, default=u""))
# File handling
##########################################################################
def copyToMedia(deck, path):
"""Copy PATH to MEDIADIR, and return new filename.
If a file with the same md5sum exists in the DB, return that.
If a file with the same name exists, return a unique name.
This does not modify the media table."""
# see if have duplicate contents
newpath = deck.db.scalar(
"select filename from media where chksum = :cs",
cs=checksum(open(path, "rb").read()))
# check if this filename already exists
if not newpath:
base = os.path.basename(path)
mdir = deck.mediaDir(create=True)
newpath = uniquePath(mdir, base)
shutil.copy2(path, newpath)
return os.path.basename(newpath)
def uniquePath(dir, base):
# remove any dangerous characters
base = re.sub(r"[][<>:/\\&]", "", base)
# find a unique name
(root, ext) = os.path.splitext(base)
def repl(match):
n = int(match.group(1))
return " (%d)" % (n+1)
while True:
path = os.path.join(dir, root + ext)
if not os.path.exists(path):
break
reg = " \((\d+)\)$"
if not re.search(reg, root):
root = root + " (1)"
else:
root = re.sub(reg, repl, root)
return path
# DB routines
##########################################################################
def updateMediaCount(deck, file, count=1):
mdir = deck.mediaDir()
if deck.db.scalar(
"select 1 from media where filename = :file", file=file):
deck.db.statement(
"update media set refcnt = refcnt + :c, modified = :t where filename = :file",
file=file, c=count, t=intTime())
elif count > 0:
try:
sum = unicode(
checksum(open(os.path.join(mdir, file), "rb").read()))
except:
sum = u""
deck.db.statement("""
insert into media (id, filename, refcnt, modified, chksum)
values (:id, :file, :c, :mod, :sum)""",
id=genID(), file=file, c=count, mod=intTime(),
sum=sum)
def removeUnusedMedia(deck):
ids = deck.db.column0("select id from media where refcnt = 0")
for id in ids:
deck.db.statement("insert into mediaDeleted values (:id, :t)",
id=id, t=time.time())
deck.db.statement("delete from media where refcnt = 0")
# String manipulation
##########################################################################
def mediaFiles(string, remote=False):
l = []
for reg in regexps:
for (full, fname) in re.findall(reg, string):
isLocal = not re.match("(https?|ftp)://", fname.lower())
if not remote and isLocal:
l.append(fname)
elif remote and not isLocal:
l.append(fname)
return l
def stripMedia(txt):
for reg in regexps:
txt = re.sub(reg, "", txt)
return txt
def escapeImages(string):
def repl(match):
tag = match.group(1)
fname = match.group(2)
if re.match("(https?|ftp)://", fname):
return tag
return tag.replace(
fname, urllib.quote(fname.encode("utf-8")))
return re.sub(regexps[1], repl, string)
# Rebuilding DB
##########################################################################
def rebuildMediaDir(deck, delete=False, dirty=True):
mdir = deck.mediaDir()
if not mdir:
return (0, 0)
deck.startProgress(title=_("Check Media DB"))
# set all ref counts to 0
deck.db.statement("update media set refcnt = 0")
# look through cards for media references
refs = {}
normrefs = {}
def norm(s):
if isinstance(s, unicode):
return unicodedata.normalize('NFD', s)
return s
for (question, answer) in deck.db.all(
"select question, answer from cards"):
for txt in (question, answer):
for f in mediaFiles(txt):
if f in refs:
refs[f] += 1
else:
refs[f] = 1
normrefs[norm(f)] = True
# update ref counts
for (file, count) in refs.items():
updateMediaCount(deck, file, count)
# find unused media
unused = []
for file in os.listdir(mdir):
path = os.path.join(mdir, file)
if not os.path.isfile(path):
# ignore directories
continue
nfile = norm(file)
if nfile not in normrefs:
unused.append(file)
# optionally delete
if delete:
for f in unused:
path = os.path.join(mdir, f)
os.unlink(path)
# remove entries in db for unused media
removeUnusedMedia(deck)
# check md5s are up to date
update = []
for (file, md5) in deck.db.all(
"select filename, chksum from media"):
path = os.path.join(mdir, file)
if not os.path.exists(path):
if md5:
update.append({'f':file, 'sum':u"", 'c':intTime()})
else:
sum = unicode(
checksum(open(os.path.join(mdir, file), "rb").read()))
if md5 != sum:
update.append({'f':file, 'sum':sum, 'c':intTime()})
if update:
deck.db.statements("""
update media set chksum = :sum, modified = :c where filename = :f""",
update)
# update deck and get return info
if dirty:
deck.flushMod()
nohave = deck.db.column0("select filename from media where chksum = ''")
deck.finishProgress()
return (nohave, unused)
# Download missing
##########################################################################
def downloadMissing(deck):
urlbase = deck.getVar("mediaURL")
if not urlbase:
return None
mdir = deck.mediaDir(create=True)
deck.startProgress()
missing = 0
grabbed = 0
for c, (f, sum) in enumerate(deck.db.all(
"select filename, chksum from media")):
path = os.path.join(mdir, f)
if not os.path.exists(path):
try:
rpath = urlbase + f
url = urllib2.urlopen(rpath)
open(f, "wb").write(url.read())
grabbed += 1
except:
if sum:
# the file is supposed to exist
deck.finishProgress()
return (False, rpath)
else:
# ignore and keep going
missing += 1
deck.updateProgress(label=_("File %d...") % (grabbed+missing))
deck.finishProgress()
return (True, grabbed, missing)
# Convert remote links to local ones
##########################################################################
def downloadRemote(deck):
mdir = deck.mediaDir(create=True)
refs = {}
deck.startProgress()
for (question, answer) in deck.db.all(
"select question, answer from cards"):
for txt in (question, answer):
for f in mediaFiles(txt, remote=True):
refs[f] = True
tmpdir = tempfile.mkdtemp(prefix="anki")
failed = []
passed = []
for c, link in enumerate(refs.keys()):
try:
path = os.path.join(tmpdir, os.path.basename(link))
url = urllib2.urlopen(link)
open(path, "wb").write(url.read())
newpath = copyToMedia(deck, path)
passed.append([link, newpath])
except:
failed.append(link)
deck.updateProgress(label=_("Download %d...") % c)
for (url, name) in passed:
deck.db.statement(
"update fields set value = replace(value, :url, :name)",
url=url, name=name)
deck.updateProgress(label=_("Updating references..."))
deck.updateProgress(label=_("Updating cards..."))
# rebuild entire q/a cache
for m in deck.models:
deck.updateCardsFromModel(m, dirty=True)
deck.finishProgress()
deck.flushMod()
return (passed, failed)