Anki/anki/media.py
2011-04-28 09:23:58 +09:00

234 lines
8.2 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import os, shutil, re, urllib, urllib2, time, tempfile, unicodedata, \
urllib, sys
from anki.utils import checksum, intTime
from anki.lang import _
class MediaRegistry(object):
# other code depends on this order, so don't reorder
regexps = ("(?i)(\[sound:([^]]+)\])",
"(?i)(<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>)")
def __init__(self, deck):
self.deck = deck
self.mediaPrefix = ""
self._dir = None
self._updateDir()
def dir(self, create=False):
if self._dir:
return self._dir
elif create:
self._updateDir(True)
return self._dir
def _updateDir(self, create=False):
if self.mediaPrefix:
dir = os.path.join(
self.mediaPrefix, os.path.basename(self.deck.path))
else:
dir = self.deck.path
dir = re.sub("(?i)\.(anki)$", ".media", dir)
if create == None:
# don't create, but return dir
return dir
if not os.path.exists(dir):
if not create:
return
# will raise error if we can't create
os.makedirs(dir)
# change to the current dir
os.chdir(dir)
self._dir = dir
# Adding media
##########################################################################
def addFile(self, opath):
"""Copy PATH to MEDIADIR, and return new filename.
If the same name exists, compare checksums."""
mdir = self.dir(create=True)
# remove any dangerous characters
base = re.sub(r"[][<>:/\\&]", "", os.path.basename(opath))
dst = os.path.join(mdir, base)
# if it doesn't exist, copy it directly
if not os.path.exists(dst):
shutil.copy2(opath, dst)
return base
# if it's identical, reuse
if self.filesIdentical(opath, dst):
return base
# otherwise, find a unique name
(root, ext) = os.path.splitext(base)
def repl(match):
n = int(match.group(1))
return " (%d)" % (n+1)
while True:
path = os.path.join(mdir, root + ext)
if not os.path.exists(path):
break
reg = " \((\d+)\)$"
if not re.search(reg, root):
root = root + " (1)"
else:
root = re.sub(reg, repl, root)
# copy and return
shutil.copy2(opath, path)
return os.path.basename(os.path.basename(path))
def filesIdentical(self, path1, path2):
"True if files are the same."
return (checksum(open(path1, "rb").read()) ==
checksum(open(path2, "rb").read()))
# String manipulation
##########################################################################
def mediaFiles(self, string, includeRemote=False):
l = []
for reg in self.regexps:
for (full, fname) in re.findall(reg, string):
isLocal = not re.match("(https?|ftp)://", fname.lower())
if isLocal or includeRemote:
l.append(fname)
return l
def strip(self, txt):
for reg in self.regexps:
txt = re.sub(reg, "", txt)
return txt
def escapeImages(self, string):
# Feeding webkit unicode can result in it not finding images, so on
# linux/osx we percent escape the image paths as utf8. On Windows the
# problem is more complicated - if we percent-escape as utf8 it fixes
# some images but breaks others. When filenames are normalized by
# dropbox they become unreadable if we escape them.
if sys.platform.startswith("win32"):
return string
def repl(match):
tag = match.group(1)
fname = match.group(2)
if re.match("(https?|ftp)://", fname):
return tag
return tag.replace(
fname, urllib.quote(fname.encode("utf-8")))
return re.sub(self.regexps[1], repl, string)
# Rebuilding DB
##########################################################################
def check(self, delete=False):
"Return (missingFiles, unusedFiles)."
mdir = self.dir()
if not mdir:
return (0, 0)
# generate card q/a and look through all references
normrefs = {}
def norm(s):
if isinstance(s, unicode):
return unicodedata.normalize('NFD', s)
return s
for f in self.allMedia():
normrefs[norm(f)] = True
# loop through directory and find unused & missing media
unused = []
for file in os.listdir(mdir):
if file.startswith("latex-"):
continue
path = os.path.join(mdir, file)
if not os.path.isfile(path):
# ignore directories
continue
nfile = norm(file)
if nfile not in normrefs:
unused.append(file)
else:
del normrefs[nfile]
# optionally delete
if delete:
for f in unused:
path = os.path.join(mdir, f)
os.unlink(path)
nohave = normrefs.keys()
return (nohave, unused)
def allMedia(self):
"Return a set of all referenced filenames."
files = set()
for p in self.deck.renderQA(type="all"):
for type in ("q", "a"):
for f in self.mediaFiles(p[type]):
files.add(f)
return files
# Download missing
##########################################################################
def downloadMissing(self):
raise Exception()
urlbase = self.deck.getVar("mediaURL")
if not urlbase:
return None
mdir = self.deck.dir(create=True)
missing = 0
grabbed = 0
for c, (f, sum) in enumerate(self.deck.db.all(
"select file, csum from media")):
path = os.path.join(mdir, f)
if not os.path.exists(path):
try:
rpath = urlbase + f
url = urllib2.urlopen(rpath)
open(f, "wb").write(url.read())
grabbed += 1
except:
if sum:
# the file is supposed to exist
return (False, rpath)
else:
# ignore and keep going
missing += 1
#self.deck.updateProgress(label=_("File %d...") % (grabbed+missing))
return (True, grabbed, missing)
# Convert remote links to local ones
##########################################################################
def downloadRemote(self):
raise Exception()
mdir = self.deck.dir(create=True)
refs = {}
for (question, answer) in self.deck.db.all(
"select question, answer from cards"):
for txt in (question, answer):
for f in mediaFiles(txt, remote=True):
refs[f] = True
tmpdir = tempfile.mkdtemp(prefix="anki")
failed = []
passed = []
for c, link in enumerate(refs.keys()):
try:
path = os.path.join(tmpdir, os.path.basename(link))
url = urllib2.urlopen(link)
open(path, "wb").write(url.read())
newpath = copyToMedia(self.deck, path)
passed.append([link, newpath])
except:
failed.append(link)
#self.deck.updateProgress(label=_("Download %d...") % c)
for (url, name) in passed:
self.deck.db.execute(
"update fields set value = replace(value, :url, :name)",
url=url, name=name)
#self.deck.updateProgress(label=_("Updating references..."))
#self.deck.updateProgress(label=_("Updating cards..."))
# rebuild entire q/a cache
for m in self.deck.models:
self.deck.updateCardsFromModel(m, dirty=True)
return (passed, failed)