Anki/anki/media.py
Damien Elmes 2f27133705 drop sqlalchemy; massive refactor
SQLAlchemy is a great tool, but it wasn't a great fit for Anki:
- We often had to drop down to raw SQL for performance reasons.
- The DB cursors and results were wrapped, which incurred a
  sizable performance hit due to introspection. Operations like fetching 50k
  records from a hot cache were taking more than twice as long to complete.
- We take advantage of sqlite-specific features, so SQL language abstraction
  is useless to us.
- The anki schema is quite small, so manually saving and loading objects is
  not a big burden.

In the process of porting to DBAPI, I've refactored the database schema:
- App configuration data that we don't need in joins or bulk updates has been
  moved into JSON objects. This simplifies serializing, and means we won't
  need DB schema changes to store extra options in the future. This change
  obsoletes the deckVars table.
- Renamed tables:
-- fieldModels -> fields
-- cardModels -> templates
-- fields -> fdata
- a number of attribute names have been shortened

Classes like Card, Fact & Model remain. They maintain a reference to the deck.
To write their state to the DB, call .flush().

Objects no longer have their modification time manually updated. Instead, the
modification time is updated when they are flushed. This also applies to the
deck.

Decks will now save on close, because various operations that were done at
deck load will be moved into deck close instead. Operations like undoing
buried card are cheap on a hot cache, but expensive on startup.
Programmatically you can call .close(save=False) to avoid a save and a
modification bump. This will be useful for generating due counts.

Because of the new saving behaviour, the save and save as options will be
removed from the GUI in the future.

The q/a cache and field cache generating has been centralized. Facts will
automatically rebuild the cache on flush; models can do so with
model.updateCache().

Media handling has also been reworked. It has moved into a MediaRegistry
object, which the deck holds. Refcounting has been dropped - it meant we had
to compare old and new value every time facts or models were changed, and
existed for the sole purpose of not showing errors on a missing media
download. Instead we just media.registerText(q+a) when it's updated. The
download function will be expanded to ask the user if they want to continue
after a certain number of files have failed to download, which should be an
adequate alternative. And we now add the file into the media DB when it's
copied to th emedia directory, not when the card is commited. This fixes
duplicates a user would get if they added the same media to a card twice
without adding the card.

The old DeckStorage object had its upgrade code split in a previous commit;
the opening and upgrading code has been merged back together, and put in a
separate storage.py file. The correct way to open a deck now is import anki; d
= anki.Deck(path).

deck.getCard() -> deck.sched.getCard()
same with answerCard
deck.getCard(id) returns a Card object now.

And the DB wrapper has had a few changes:
- sql statements are a more standard DBAPI:
 - statement() -> execute()
 - statements() -> executemany()
- called like execute(sql, 1, 2, 3) or execute(sql, a=1, b=2, c=3)
- column0 -> list
2011-04-28 09:23:53 +09:00

273 lines
9.5 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
import os, shutil, re, urllib2, time, tempfile, unicodedata, urllib
from anki.utils import checksum, genID, intTime
from anki.lang import _
class MediaRegistry(object):
# other code depends on this order, so don't reorder
regexps = ("(?i)(\[sound:([^]]+)\])",
"(?i)(<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>)")
def __init__(self, deck):
self.deck = deck
self.mediaPrefix = ""
self._mediaDir = None
self._updateMediaDir()
def mediaDir(self, create=False):
if self._mediaDir:
return self._mediaDir
elif create:
self._updateMediaDir(True)
return self._mediaDir
def _updateMediaDir(self, create=False):
if self.mediaPrefix:
dir = os.path.join(
self.mediaPrefix, os.path.basename(self.deck.path))
else:
dir = self.deck.path
dir = re.sub("(?i)\.(anki)$", ".media", dir)
if create == None:
# don't create, but return dir
return dir
if not os.path.exists(dir):
if not create:
return
# will raise error if we can't create
os.makedirs(dir)
# change to the current dir
os.chdir(dir)
self._mediaDir = dir
# Adding and registering media
##########################################################################
def addFile(self, path):
"""Copy PATH to MEDIADIR, and return new filename.
If a file with the same md5sum exists in the DB, return that.
If a file with the same name exists, return a unique name."""
# see if have duplicate contents
csum = self.mediaChecksum(path)
if not csum:
# file was unreadable or didn't exist
return None
file = self.deck.db.scalar(
"select file from media where csum = :cs",
cs=csum)
if not file:
base = os.path.basename(path)
mdir = self.mediaDir(create=True)
file = self.uniquePath(mdir, base)
shutil.copy2(path, file)
self.registerFile(base)
return os.path.basename(file)
def registerFile(self, file):
"Add a single file to the media database."
if self.mediaDir():
csum = self.mediaChecksum(os.path.join(self.mediaDir(), file))
else:
csum = ""
self.deck.db.execute(
"insert or replace into media values (?, ?, ?)",
file, intTime(), csum)
def registerText(self, string):
"Add all media in string to the media database."
for f in self.mediaFiles(string):
self.registerFile(f)
def removeUnusedMedia(deck):
ids = deck.s.list("select id from media where size = 0")
for id in ids:
deck.s.statement("insert into mediaDeleted values (:id, :t)",
id=id, t=time.time())
deck.s.statement("delete from media where size = 0")
# Moving media
##########################################################################
def renameMediaDir(self, oldPath):
"Copy oldPath to our current media dir. "
assert os.path.exists(oldPath)
newPath = self.mediaDir(create=None)
# copytree doesn't want the dir to exist
try:
shutil.copytree(oldPath, newPath)
except:
# FIXME: should really remove everything in old dir instead of
# giving up
pass
# Tools
##########################################################################
def mediaChecksum(self, path):
"Return checksum of PATH, or empty string."
try:
return checksum(open(path, "rb").read())
except:
return ""
def uniquePath(self, dir, base):
# remove any dangerous characters
base = re.sub(r"[][<>:/\\&]", "", base)
# find a unique name
(root, ext) = os.path.splitext(base)
def repl(match):
n = int(match.group(1))
return " (%d)" % (n+1)
while True:
path = os.path.join(dir, root + ext)
if not os.path.exists(path):
break
reg = " \((\d+)\)$"
if not re.search(reg, root):
root = root + " (1)"
else:
root = re.sub(reg, repl, root)
return path
# String manipulation
##########################################################################
def mediaFiles(self, string, includeRemote=False):
l = []
for reg in self.regexps:
for (full, fname) in re.findall(reg, string):
isLocal = not re.match("(https?|ftp)://", fname.lower())
if isLocal or includeRemote:
l.append(fname)
return l
def stripMedia(self, txt):
for reg in self.regexps:
txt = re.sub(reg, "", txt)
return txt
def escapeImages(self, string):
def repl(match):
tag = match.group(1)
fname = match.group(2)
if re.match("(https?|ftp)://", fname):
return tag
return tag.replace(
fname, urllib.quote(fname.encode("utf-8")))
return re.sub(self.regexps[1], repl, string)
# Rebuilding DB
##########################################################################
def rebuildMediaDir(self, delete=False):
mdir = self.mediaDir()
if not mdir:
return (0, 0)
self.deck.startProgress()
# delete all media entries in database
self.deck.db.execute("delete from media")
# look through cards for media references
normrefs = {}
def norm(s):
if isinstance(s, unicode):
return unicodedata.normalize('NFD', s)
return s
for (question, answer) in self.deck.db.all(
"select q, a from cards"):
for txt in (question, answer):
for f in self.mediaFiles(txt):
normrefs[norm(f)] = True
self.registerFile(f)
# find unused media
unused = []
for file in os.listdir(mdir):
path = os.path.join(mdir, file)
if not os.path.isfile(path):
# ignore directories
continue
nfile = norm(file)
if nfile not in normrefs:
unused.append(file)
# optionally delete
if delete:
for f in unused:
path = os.path.join(mdir, f)
os.unlink(path)
nohave = self.deck.db.list(
"select file from media where csum = ''")
self.deck.finishProgress()
return (nohave, unused)
# Download missing
##########################################################################
def downloadMissing(self):
urlbase = self.deck.getVar("mediaURL")
if not urlbase:
return None
mdir = self.deck.mediaDir(create=True)
self.deck.startProgress()
missing = 0
grabbed = 0
for c, (f, sum) in enumerate(self.deck.db.all(
"select file, csum from media")):
path = os.path.join(mdir, f)
if not os.path.exists(path):
try:
rpath = urlbase + f
url = urllib2.urlopen(rpath)
open(f, "wb").write(url.read())
grabbed += 1
except:
if sum:
# the file is supposed to exist
self.deck.finishProgress()
return (False, rpath)
else:
# ignore and keep going
missing += 1
self.deck.updateProgress(label=_("File %d...") % (grabbed+missing))
self.deck.finishProgress()
return (True, grabbed, missing)
# Convert remote links to local ones
##########################################################################
def downloadRemote(self):
mdir = self.deck.mediaDir(create=True)
refs = {}
self.deck.startProgress()
for (question, answer) in self.deck.db.all(
"select question, answer from cards"):
for txt in (question, answer):
for f in mediaFiles(txt, remote=True):
refs[f] = True
tmpdir = tempfile.mkdtemp(prefix="anki")
failed = []
passed = []
for c, link in enumerate(refs.keys()):
try:
path = os.path.join(tmpdir, os.path.basename(link))
url = urllib2.urlopen(link)
open(path, "wb").write(url.read())
newpath = copyToMedia(self.deck, path)
passed.append([link, newpath])
except:
failed.append(link)
self.deck.updateProgress(label=_("Download %d...") % c)
for (url, name) in passed:
self.deck.db.execute(
"update fields set value = replace(value, :url, :name)",
url=url, name=name)
self.deck.updateProgress(label=_("Updating references..."))
self.deck.updateProgress(label=_("Updating cards..."))
# rebuild entire q/a cache
for m in self.deck.models:
self.deck.updateCardsFromModel(m, dirty=True)
self.deck.finishProgress()
return (passed, failed)