mirror of
https://github.com/ankitects/anki.git
synced 2025-09-20 06:52:21 -04:00

Anki used random 64bit IDs for cards, facts and fields. This had some nice properties: - merging data in syncs and imports was simply a matter of copying each way, as conflicts were astronomically unlikely - it made it easy to identify identical cards and prevent them from being reimported But there were some negatives too: - they're more expensive to store - javascript can't handle numbers > 2**53, which means AnkiMobile, iAnki and so on have to treat the ids as strings, which is slow - simply copying data in a sync or import can lead to corruption, as while a duplicate id indicates the data was originally the same, it may have diverged. A more intelligent approach is necessary. - sqlite was sorting the fields table based on the id, which meant the fields were spread across the table, and costly to fetch So instead, we'll move to incremental ids. In the case of model changes we'll declare that a schema change and force a full sync to avoid having to deal with conflicts, and in the case of cards and facts, we'll need to update the ids on one end to merge. Identical cards can be detected by checking to see if their id is the same and their creation time is the same. Creation time has been added back to cards and facts because it's necessary for sync conflict merging. That means facts.pos is not required. The graves table has been removed. It's not necessary for schema related changes, and dead cards/facts can be represented as a card with queue=-4 and created=0. Because we will record schema modification time and can ensure a full sync propagates to all endpoints, it means we can remove the dead cards/facts on schema change. Tags have been removed from the facts table and are represented as a field with ord=-1 and fmid=0. Combined with the locality improvement for fields, it means that fetching fields is not much more expensive than using the q/a cache. Because of the above, removing the q/a cache is a possibility now. The q and a columns on cards has been dropped. It will still be necessary to render the q/a on fact add/edit, since we need to record media references. It would be nice to avoid this in the future. Perhaps one way would be the ability to assign a type to fields, like "image", "audio", or "latex". LaTeX needs special consider anyway, as it was being rendered into the q/a cache.
277 lines
9.7 KiB
Python
277 lines
9.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright: Damien Elmes <anki@ichi2.net>
|
|
# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
|
|
|
|
import os, shutil, re, urllib2, time, tempfile, unicodedata, urllib
|
|
from anki.utils import checksum, intTime
|
|
from anki.lang import _
|
|
|
|
class MediaRegistry(object):
|
|
|
|
# other code depends on this order, so don't reorder
|
|
regexps = ("(?i)(\[sound:([^]]+)\])",
|
|
"(?i)(<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>)")
|
|
|
|
def __init__(self, deck):
|
|
self.deck = deck
|
|
self.mediaPrefix = ""
|
|
self._mediaDir = None
|
|
self._updateMediaDir()
|
|
|
|
def mediaDir(self, create=False):
|
|
if self._mediaDir:
|
|
return self._mediaDir
|
|
elif create:
|
|
self._updateMediaDir(True)
|
|
return self._mediaDir
|
|
|
|
def _updateMediaDir(self, create=False):
|
|
if self.mediaPrefix:
|
|
dir = os.path.join(
|
|
self.mediaPrefix, os.path.basename(self.deck.path))
|
|
else:
|
|
dir = self.deck.path
|
|
dir = re.sub("(?i)\.(anki)$", ".media", dir)
|
|
if create == None:
|
|
# don't create, but return dir
|
|
return dir
|
|
if not os.path.exists(dir):
|
|
if not create:
|
|
return
|
|
# will raise error if we can't create
|
|
os.makedirs(dir)
|
|
# change to the current dir
|
|
os.chdir(dir)
|
|
self._mediaDir = dir
|
|
|
|
# Adding and registering media
|
|
##########################################################################
|
|
|
|
def addFile(self, path):
|
|
"""Copy PATH to MEDIADIR, and return new filename.
|
|
If a file with the same md5sum exists in the DB, return that.
|
|
If a file with the same name exists, return a unique name."""
|
|
# see if have duplicate contents
|
|
csum = self.mediaChecksum(path)
|
|
if not csum:
|
|
# file was unreadable or didn't exist
|
|
return None
|
|
file = self.deck.db.scalar(
|
|
"select file from media where csum = :cs",
|
|
cs=csum)
|
|
if not file:
|
|
base = os.path.basename(path)
|
|
mdir = self.mediaDir(create=True)
|
|
file = self.uniquePath(mdir, base)
|
|
shutil.copy2(path, file)
|
|
self.registerFile(base)
|
|
return os.path.basename(file)
|
|
|
|
def registerFile(self, file):
|
|
"Add a single file to the media database."
|
|
if self.mediaDir():
|
|
csum = self.mediaChecksum(os.path.join(self.mediaDir(), file))
|
|
else:
|
|
csum = ""
|
|
self.deck.db.execute(
|
|
"insert or replace into media values (?, ?, ?)",
|
|
file, intTime(), csum)
|
|
|
|
def registerText(self, string):
|
|
"Add all media in string to the media database."
|
|
for f in self.mediaFiles(string):
|
|
self.registerFile(f)
|
|
|
|
def removeUnusedMedia(deck):
|
|
ids = deck.s.list("select id from media where size = 0")
|
|
for id in ids:
|
|
deck.s.statement("insert into mediaDeleted values (:id, :t)",
|
|
id=id, t=time.time())
|
|
deck.s.statement("delete from media where size = 0")
|
|
|
|
# Moving media
|
|
##########################################################################
|
|
|
|
def renameMediaDir(self, oldPath):
|
|
"Copy oldPath to our current media dir. "
|
|
assert os.path.exists(oldPath)
|
|
newPath = self.mediaDir(create=None)
|
|
# copytree doesn't want the dir to exist
|
|
try:
|
|
shutil.copytree(oldPath, newPath)
|
|
except:
|
|
# FIXME: should really remove everything in old dir instead of
|
|
# giving up
|
|
pass
|
|
|
|
# Tools
|
|
##########################################################################
|
|
|
|
def mediaChecksum(self, path):
|
|
"Return checksum of PATH, or empty string."
|
|
try:
|
|
return checksum(open(path, "rb").read())
|
|
except:
|
|
return ""
|
|
|
|
def uniquePath(self, dir, base):
|
|
# remove any dangerous characters
|
|
base = re.sub(r"[][<>:/\\&]", "", base)
|
|
# find a unique name
|
|
(root, ext) = os.path.splitext(base)
|
|
def repl(match):
|
|
n = int(match.group(1))
|
|
return " (%d)" % (n+1)
|
|
while True:
|
|
path = os.path.join(dir, root + ext)
|
|
if not os.path.exists(path):
|
|
break
|
|
reg = " \((\d+)\)$"
|
|
if not re.search(reg, root):
|
|
root = root + " (1)"
|
|
else:
|
|
root = re.sub(reg, repl, root)
|
|
return path
|
|
|
|
# String manipulation
|
|
##########################################################################
|
|
|
|
def mediaFiles(self, string, includeRemote=False):
|
|
l = []
|
|
for reg in self.regexps:
|
|
for (full, fname) in re.findall(reg, string):
|
|
isLocal = not re.match("(https?|ftp)://", fname.lower())
|
|
if isLocal or includeRemote:
|
|
l.append(fname)
|
|
return l
|
|
|
|
def stripMedia(self, txt):
|
|
for reg in self.regexps:
|
|
txt = re.sub(reg, "", txt)
|
|
return txt
|
|
|
|
def escapeImages(self, string):
|
|
def repl(match):
|
|
tag = match.group(1)
|
|
fname = match.group(2)
|
|
if re.match("(https?|ftp)://", fname):
|
|
return tag
|
|
return tag.replace(
|
|
fname, urllib.quote(fname.encode("utf-8")))
|
|
return re.sub(self.regexps[1], repl, string)
|
|
|
|
# Rebuilding DB
|
|
##########################################################################
|
|
|
|
def rebuildMediaDir(self, delete=False):
|
|
mdir = self.mediaDir()
|
|
if not mdir:
|
|
return (0, 0)
|
|
self.deck.startProgress()
|
|
# delete all media entries in database
|
|
self.deck.db.execute("delete from media")
|
|
# look through cards for media references
|
|
normrefs = {}
|
|
def norm(s):
|
|
if isinstance(s, unicode):
|
|
return unicodedata.normalize('NFD', s)
|
|
return s
|
|
# generate q/a and look through all references
|
|
(cids, fids, meta) = self.deck._cacheMeta()
|
|
facts = self.deck._cacheFacts(fids)
|
|
pend = [self.deck.formatQA(cids[n], facts[fids[n]], meta[cids[n]])
|
|
for n in range(len(cids))]
|
|
for p in pend:
|
|
for type in ("q", "a"):
|
|
for f in self.mediaFiles(p[type]):
|
|
normrefs[norm(f)] = True
|
|
self.registerFile(f)
|
|
# find unused media
|
|
unused = []
|
|
for file in os.listdir(mdir):
|
|
path = os.path.join(mdir, file)
|
|
if not os.path.isfile(path):
|
|
# ignore directories
|
|
continue
|
|
nfile = norm(file)
|
|
if nfile not in normrefs:
|
|
unused.append(file)
|
|
# optionally delete
|
|
if delete:
|
|
for f in unused:
|
|
path = os.path.join(mdir, f)
|
|
os.unlink(path)
|
|
nohave = self.deck.db.list(
|
|
"select file from media where csum = ''")
|
|
self.deck.finishProgress()
|
|
return (nohave, unused)
|
|
|
|
# Download missing
|
|
##########################################################################
|
|
|
|
def downloadMissing(self):
|
|
urlbase = self.deck.getVar("mediaURL")
|
|
if not urlbase:
|
|
return None
|
|
mdir = self.deck.mediaDir(create=True)
|
|
self.deck.startProgress()
|
|
missing = 0
|
|
grabbed = 0
|
|
for c, (f, sum) in enumerate(self.deck.db.all(
|
|
"select file, csum from media")):
|
|
path = os.path.join(mdir, f)
|
|
if not os.path.exists(path):
|
|
try:
|
|
rpath = urlbase + f
|
|
url = urllib2.urlopen(rpath)
|
|
open(f, "wb").write(url.read())
|
|
grabbed += 1
|
|
except:
|
|
if sum:
|
|
# the file is supposed to exist
|
|
self.deck.finishProgress()
|
|
return (False, rpath)
|
|
else:
|
|
# ignore and keep going
|
|
missing += 1
|
|
self.deck.updateProgress(label=_("File %d...") % (grabbed+missing))
|
|
self.deck.finishProgress()
|
|
return (True, grabbed, missing)
|
|
|
|
# Convert remote links to local ones
|
|
##########################################################################
|
|
|
|
def downloadRemote(self):
|
|
mdir = self.deck.mediaDir(create=True)
|
|
refs = {}
|
|
self.deck.startProgress()
|
|
for (question, answer) in self.deck.db.all(
|
|
"select question, answer from cards"):
|
|
for txt in (question, answer):
|
|
for f in mediaFiles(txt, remote=True):
|
|
refs[f] = True
|
|
|
|
tmpdir = tempfile.mkdtemp(prefix="anki")
|
|
failed = []
|
|
passed = []
|
|
for c, link in enumerate(refs.keys()):
|
|
try:
|
|
path = os.path.join(tmpdir, os.path.basename(link))
|
|
url = urllib2.urlopen(link)
|
|
open(path, "wb").write(url.read())
|
|
newpath = copyToMedia(self.deck, path)
|
|
passed.append([link, newpath])
|
|
except:
|
|
failed.append(link)
|
|
self.deck.updateProgress(label=_("Download %d...") % c)
|
|
for (url, name) in passed:
|
|
self.deck.db.execute(
|
|
"update fields set value = replace(value, :url, :name)",
|
|
url=url, name=name)
|
|
self.deck.updateProgress(label=_("Updating references..."))
|
|
self.deck.updateProgress(label=_("Updating cards..."))
|
|
# rebuild entire q/a cache
|
|
for m in self.deck.models:
|
|
self.deck.updateCardsFromModel(m, dirty=True)
|
|
self.deck.finishProgress()
|
|
return (passed, failed)
|