Anki/anki/media.py

273 lines
9.5 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import os, shutil, re, urllib2, time, tempfile, unicodedata, urllib
from anki.utils import checksum, intTime
from anki.lang import _
class MediaRegistry(object):
# other code depends on this order, so don't reorder
regexps = ("(?i)(\[sound:([^]]+)\])",
"(?i)(<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>)")
def __init__(self, deck):
self.deck = deck
self.mediaPrefix = ""
self._mediaDir = None
self._updateMediaDir()
def mediaDir(self, create=False):
if self._mediaDir:
return self._mediaDir
elif create:
self._updateMediaDir(True)
return self._mediaDir
def _updateMediaDir(self, create=False):
if self.mediaPrefix:
dir = os.path.join(
self.mediaPrefix, os.path.basename(self.deck.path))
else:
dir = self.deck.path
dir = re.sub("(?i)\.(anki)$", ".media", dir)
if create == None:
# don't create, but return dir
return dir
if not os.path.exists(dir):
if not create:
return
# will raise error if we can't create
os.makedirs(dir)
# change to the current dir
os.chdir(dir)
self._mediaDir = dir
# Adding and registering media
##########################################################################
def addFile(self, path):
"""Copy PATH to MEDIADIR, and return new filename.
If a file with the same md5sum exists in the DB, return that.
If a file with the same name exists, return a unique name."""
# see if have duplicate contents
csum = self.mediaChecksum(path)
if not csum:
# file was unreadable or didn't exist
return None
file = self.deck.db.scalar(
"select file from media where csum = :cs",
cs=csum)
if not file:
base = os.path.basename(path)
mdir = self.mediaDir(create=True)
file = self.uniquePath(mdir, base)
shutil.copy2(path, file)
self.registerFile(base)
return os.path.basename(file)
def registerFile(self, file):
"Add a single file to the media database."
if self.mediaDir():
csum = self.mediaChecksum(os.path.join(self.mediaDir(), file))
else:
csum = ""
self.deck.db.execute(
"insert or replace into media values (?, ?, ?)",
file, intTime(), csum)
def registerText(self, string):
"Add all media in string to the media database."
for f in self.mediaFiles(string):
self.registerFile(f)
def removeUnusedMedia(deck):
ids = deck.s.list("select id from media where size = 0")
for id in ids:
deck.s.statement("insert into mediaDeleted values (:id, :t)",
id=id, t=time.time())
deck.s.statement("delete from media where size = 0")
# Moving media
##########################################################################
def renameMediaDir(self, oldPath):
"Copy oldPath to our current media dir. "
assert os.path.exists(oldPath)
newPath = self.mediaDir(create=None)
# copytree doesn't want the dir to exist
try:
shutil.copytree(oldPath, newPath)
except:
# FIXME: should really remove everything in old dir instead of
# giving up
pass
# Tools
##########################################################################
def mediaChecksum(self, path):
"Return checksum of PATH, or empty string."
try:
return checksum(open(path, "rb").read())
except:
return ""
def uniquePath(self, dir, base):
# remove any dangerous characters
base = re.sub(r"[][<>:/\\&]", "", base)
# find a unique name
(root, ext) = os.path.splitext(base)
def repl(match):
n = int(match.group(1))
return " (%d)" % (n+1)
while True:
path = os.path.join(dir, root + ext)
if not os.path.exists(path):
break
reg = " \((\d+)\)$"
if not re.search(reg, root):
root = root + " (1)"
else:
root = re.sub(reg, repl, root)
return path
# String manipulation
##########################################################################
def mediaFiles(self, string, includeRemote=False):
l = []
for reg in self.regexps:
for (full, fname) in re.findall(reg, string):
isLocal = not re.match("(https?|ftp)://", fname.lower())
if isLocal or includeRemote:
l.append(fname)
return l
def stripMedia(self, txt):
for reg in self.regexps:
txt = re.sub(reg, "", txt)
return txt
def escapeImages(self, string):
def repl(match):
tag = match.group(1)
fname = match.group(2)
if re.match("(https?|ftp)://", fname):
return tag
return tag.replace(
fname, urllib.quote(fname.encode("utf-8")))
return re.sub(self.regexps[1], repl, string)
# Rebuilding DB
##########################################################################
def rebuildMediaDir(self, delete=False):
mdir = self.mediaDir()
if not mdir:
return (0, 0)
self.deck.startProgress()
# delete all media entries in database
self.deck.db.execute("delete from media")
# look through cards for media references
normrefs = {}
def norm(s):
if isinstance(s, unicode):
return unicodedata.normalize('NFD', s)
return s
# generate q/a and look through all references
for p in self.deck.renderQA(type="all"):
for type in ("q", "a"):
for f in self.mediaFiles(p[type]):
normrefs[norm(f)] = True
self.registerFile(f)
# find unused media
unused = []
for file in os.listdir(mdir):
path = os.path.join(mdir, file)
if not os.path.isfile(path):
# ignore directories
continue
nfile = norm(file)
if nfile not in normrefs:
unused.append(file)
# optionally delete
if delete:
for f in unused:
path = os.path.join(mdir, f)
os.unlink(path)
nohave = self.deck.db.list(
"select file from media where csum = ''")
self.deck.finishProgress()
return (nohave, unused)
# Download missing
##########################################################################
def downloadMissing(self):
urlbase = self.deck.getVar("mediaURL")
if not urlbase:
return None
mdir = self.deck.mediaDir(create=True)
self.deck.startProgress()
missing = 0
grabbed = 0
for c, (f, sum) in enumerate(self.deck.db.all(
"select file, csum from media")):
path = os.path.join(mdir, f)
if not os.path.exists(path):
try:
rpath = urlbase + f
url = urllib2.urlopen(rpath)
open(f, "wb").write(url.read())
grabbed += 1
except:
if sum:
# the file is supposed to exist
self.deck.finishProgress()
return (False, rpath)
else:
# ignore and keep going
missing += 1
self.deck.updateProgress(label=_("File %d...") % (grabbed+missing))
self.deck.finishProgress()
return (True, grabbed, missing)
# Convert remote links to local ones
##########################################################################
def downloadRemote(self):
mdir = self.deck.mediaDir(create=True)
refs = {}
self.deck.startProgress()
for (question, answer) in self.deck.db.all(
"select question, answer from cards"):
for txt in (question, answer):
for f in mediaFiles(txt, remote=True):
refs[f] = True
tmpdir = tempfile.mkdtemp(prefix="anki")
failed = []
passed = []
for c, link in enumerate(refs.keys()):
try:
path = os.path.join(tmpdir, os.path.basename(link))
url = urllib2.urlopen(link)
open(path, "wb").write(url.read())
newpath = copyToMedia(self.deck, path)
passed.append([link, newpath])
except:
failed.append(link)
self.deck.updateProgress(label=_("Download %d...") % c)
for (url, name) in passed:
self.deck.db.execute(
"update fields set value = replace(value, :url, :name)",
url=url, name=name)
self.deck.updateProgress(label=_("Updating references..."))
self.deck.updateProgress(label=_("Updating cards..."))
# rebuild entire q/a cache
for m in self.deck.models:
self.deck.updateCardsFromModel(m, dirty=True)
self.deck.finishProgress()
return (passed, failed)