mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 14:32:22 -04:00
remove the media table
The media table was originally introduced when Anki hashed media filenames, and needed a way to remember the original filename. It also helped with: 1) getting a quick list of all media used in the deck, or the media added since the last sync, for mobile clients 2) merging identical files with different names But had some drawbacks: - every operation that modifies templates, models or facts meant generating the q/a and checking if any new media had appeared - each entry is about 70 bytes, and some decks have 100k+ media files So we remove the media table. We address 1) by being more intelligent about media downloads on the mobile platform. We ask the user after a full sync if they want to look for missing media, and they can choose not to if they know they haven't added any. And on a partial sync, we can scan the contents of the incoming facts for media references, and download any references we find. This also avoids all the issues people had with media not downloading because it was in their media folder but not in the media database. For 2), when copying media to the media folder, if we have a duplicate filename, we check if that file has the same md5, and avoid copying if so. This won't merge identical content that has separate names, but instances where users need that are rare.
This commit is contained in:
parent
bd477de1a9
commit
be045d451c
6 changed files with 98 additions and 183 deletions
|
@ -450,7 +450,6 @@ select id from cards where fid in (select id from facts where mid = ?)""",
|
|||
html = anki.template.render(format, fields)
|
||||
# if filters:
|
||||
# d[type] = runFilter("renderQA.post", html, fields, meta, self)
|
||||
self.media.registerText(html)
|
||||
d[type] = html
|
||||
return d
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ def buildImg(deck, latex):
|
|||
texfile.write(latex)
|
||||
texfile.close()
|
||||
# make sure we have a valid mediaDir
|
||||
mdir = deck.mediaDir(create=True)
|
||||
mdir = deck.media.dir(create=True)
|
||||
oldcwd = os.getcwd()
|
||||
if sys.platform == "win32":
|
||||
si = subprocess.STARTUPINFO()
|
||||
|
|
150
anki/media.py
150
anki/media.py
|
@ -15,17 +15,17 @@ class MediaRegistry(object):
|
|||
def __init__(self, deck):
|
||||
self.deck = deck
|
||||
self.mediaPrefix = ""
|
||||
self._mediaDir = None
|
||||
self._updateMediaDir()
|
||||
self._dir = None
|
||||
self._updateDir()
|
||||
|
||||
def mediaDir(self, create=False):
|
||||
if self._mediaDir:
|
||||
return self._mediaDir
|
||||
def dir(self, create=False):
|
||||
if self._dir:
|
||||
return self._dir
|
||||
elif create:
|
||||
self._updateMediaDir(True)
|
||||
return self._mediaDir
|
||||
self._updateDir(True)
|
||||
return self._dir
|
||||
|
||||
def _updateMediaDir(self, create=False):
|
||||
def _updateDir(self, create=False):
|
||||
if self.mediaPrefix:
|
||||
dir = os.path.join(
|
||||
self.mediaPrefix, os.path.basename(self.deck.path))
|
||||
|
@ -42,88 +42,32 @@ class MediaRegistry(object):
|
|||
os.makedirs(dir)
|
||||
# change to the current dir
|
||||
os.chdir(dir)
|
||||
self._mediaDir = dir
|
||||
self._dir = dir
|
||||
|
||||
# Adding and registering media
|
||||
# Adding media
|
||||
##########################################################################
|
||||
|
||||
def addFile(self, path):
|
||||
def addFile(self, opath):
|
||||
"""Copy PATH to MEDIADIR, and return new filename.
|
||||
If a file with the same md5sum exists in the DB, return that.
|
||||
If a file with the same name exists, return a unique name."""
|
||||
# see if have duplicate contents
|
||||
csum = self.mediaChecksum(path)
|
||||
if not csum:
|
||||
# file was unreadable or didn't exist
|
||||
return None
|
||||
file = self.deck.db.scalar(
|
||||
"select file from media where csum = :cs",
|
||||
cs=csum)
|
||||
if not file:
|
||||
base = os.path.basename(path)
|
||||
mdir = self.mediaDir(create=True)
|
||||
file = self.uniquePath(mdir, base)
|
||||
shutil.copy2(path, file)
|
||||
self.registerFile(base)
|
||||
return os.path.basename(file)
|
||||
|
||||
def registerFile(self, file):
|
||||
"Add a single file to the media database."
|
||||
if self.mediaDir():
|
||||
csum = self.mediaChecksum(os.path.join(self.mediaDir(), file))
|
||||
else:
|
||||
csum = ""
|
||||
self.deck.db.execute(
|
||||
"insert or replace into media values (?, ?, ?)",
|
||||
file, intTime(), csum)
|
||||
|
||||
def registerText(self, string):
|
||||
"Add all media in string to the media database."
|
||||
for f in self.mediaFiles(string):
|
||||
self.registerFile(f)
|
||||
|
||||
def removeUnusedMedia(deck):
|
||||
ids = deck.s.list("select id from media where size = 0")
|
||||
for id in ids:
|
||||
deck.s.statement("insert into mediaDeleted values (:id, :t)",
|
||||
id=id, t=time.time())
|
||||
deck.s.statement("delete from media where size = 0")
|
||||
|
||||
# Moving media
|
||||
##########################################################################
|
||||
|
||||
def renameMediaDir(self, oldPath):
|
||||
"Copy oldPath to our current media dir. "
|
||||
assert os.path.exists(oldPath)
|
||||
newPath = self.mediaDir(create=None)
|
||||
# copytree doesn't want the dir to exist
|
||||
try:
|
||||
shutil.copytree(oldPath, newPath)
|
||||
except:
|
||||
# FIXME: should really remove everything in old dir instead of
|
||||
# giving up
|
||||
pass
|
||||
|
||||
# Tools
|
||||
##########################################################################
|
||||
|
||||
def mediaChecksum(self, path):
|
||||
"Return checksum of PATH, or empty string."
|
||||
try:
|
||||
return checksum(open(path, "rb").read())
|
||||
except:
|
||||
return ""
|
||||
|
||||
def uniquePath(self, dir, base):
|
||||
If the same name exists, compare checksums."""
|
||||
mdir = self.dir(create=True)
|
||||
# remove any dangerous characters
|
||||
base = re.sub(r"[][<>:/\\&]", "", base)
|
||||
# find a unique name
|
||||
base = re.sub(r"[][<>:/\\&]", "", os.path.basename(opath))
|
||||
dst = os.path.join(mdir, base)
|
||||
# if it doesn't exist, copy it directly
|
||||
if not os.path.exists(dst):
|
||||
shutil.copy2(opath, dst)
|
||||
return base
|
||||
# if it's identical, reuse
|
||||
if self.filesIdentical(opath, dst):
|
||||
return base
|
||||
# otherwise, find a unique name
|
||||
(root, ext) = os.path.splitext(base)
|
||||
def repl(match):
|
||||
n = int(match.group(1))
|
||||
return " (%d)" % (n+1)
|
||||
while True:
|
||||
path = os.path.join(dir, root + ext)
|
||||
path = os.path.join(mdir, root + ext)
|
||||
if not os.path.exists(path):
|
||||
break
|
||||
reg = " \((\d+)\)$"
|
||||
|
@ -131,7 +75,14 @@ If a file with the same name exists, return a unique name."""
|
|||
root = root + " (1)"
|
||||
else:
|
||||
root = re.sub(reg, repl, root)
|
||||
return path
|
||||
# copy and return
|
||||
shutil.copy2(opath, path)
|
||||
return os.path.basename(os.path.basename(path))
|
||||
|
||||
def filesIdentical(self, path1, path2):
|
||||
"True if files are the same."
|
||||
return (checksum(open(path1, "rb").read()) ==
|
||||
checksum(open(path2, "rb").read()))
|
||||
|
||||
# String manipulation
|
||||
##########################################################################
|
||||
|
@ -163,25 +114,20 @@ If a file with the same name exists, return a unique name."""
|
|||
# Rebuilding DB
|
||||
##########################################################################
|
||||
|
||||
def rebuildMediaDir(self, delete=False):
|
||||
mdir = self.mediaDir()
|
||||
def check(self, delete=False):
|
||||
"Return (missingFiles, unusedFiles)."
|
||||
mdir = self.dir()
|
||||
if not mdir:
|
||||
return (0, 0)
|
||||
# delete all media entries in database
|
||||
self.deck.db.execute("delete from media")
|
||||
# look through cards for media references
|
||||
# generate card q/a and look through all references
|
||||
normrefs = {}
|
||||
def norm(s):
|
||||
if isinstance(s, unicode):
|
||||
return unicodedata.normalize('NFD', s)
|
||||
return s
|
||||
# generate q/a and look through all references
|
||||
for p in self.deck.renderQA(type="all"):
|
||||
for type in ("q", "a"):
|
||||
for f in self.mediaFiles(p[type]):
|
||||
normrefs[norm(f)] = True
|
||||
self.registerFile(f)
|
||||
# find unused media
|
||||
for f in self.allMedia():
|
||||
normrefs[norm(f)] = True
|
||||
# loop through directory and find unused & missing media
|
||||
unused = []
|
||||
for file in os.listdir(mdir):
|
||||
path = os.path.join(mdir, file)
|
||||
|
@ -191,15 +137,25 @@ If a file with the same name exists, return a unique name."""
|
|||
nfile = norm(file)
|
||||
if nfile not in normrefs:
|
||||
unused.append(file)
|
||||
else:
|
||||
del normrefs[nfile]
|
||||
# optionally delete
|
||||
if delete:
|
||||
for f in unused:
|
||||
path = os.path.join(mdir, f)
|
||||
os.unlink(path)
|
||||
nohave = self.deck.db.list(
|
||||
"select file from media where csum = ''")
|
||||
nohave = normrefs.keys()
|
||||
return (nohave, unused)
|
||||
|
||||
def allMedia(self):
|
||||
"Return a set of all referenced filenames."
|
||||
files = set()
|
||||
for p in self.deck.renderQA(type="all"):
|
||||
for type in ("q", "a"):
|
||||
for f in self.mediaFiles(p[type]):
|
||||
files.add(f)
|
||||
return files
|
||||
|
||||
# Download missing
|
||||
##########################################################################
|
||||
|
||||
|
@ -207,7 +163,7 @@ If a file with the same name exists, return a unique name."""
|
|||
urlbase = self.deck.getVar("mediaURL")
|
||||
if not urlbase:
|
||||
return None
|
||||
mdir = self.deck.mediaDir(create=True)
|
||||
mdir = self.deck.dir(create=True)
|
||||
missing = 0
|
||||
grabbed = 0
|
||||
for c, (f, sum) in enumerate(self.deck.db.all(
|
||||
|
@ -233,7 +189,7 @@ If a file with the same name exists, return a unique name."""
|
|||
##########################################################################
|
||||
|
||||
def downloadRemote(self):
|
||||
mdir = self.deck.mediaDir(create=True)
|
||||
mdir = self.deck.dir(create=True)
|
||||
refs = {}
|
||||
for (question, answer) in self.deck.db.all(
|
||||
"select question, answer from cards"):
|
||||
|
|
|
@ -130,12 +130,6 @@ create table if not exists gconf (
|
|||
conf text not null
|
||||
);
|
||||
|
||||
create table if not exists media (
|
||||
file text primary key,
|
||||
mod integer not null,
|
||||
csum text not null
|
||||
);
|
||||
|
||||
create table if not exists revlog (
|
||||
time integer primary key,
|
||||
cid integer not null,
|
||||
|
@ -182,9 +176,7 @@ create index if not exists ix_facts_mod on facts (mod);
|
|||
create index if not exists ix_cards_fid on cards (fid);
|
||||
-- revlog by card
|
||||
create index if not exists ix_revlog_cid on revlog (cid);
|
||||
-- media
|
||||
create index if not exists ix_media_csum on media (csum);
|
||||
-- unique checking
|
||||
-- field uniqueness check
|
||||
create index if not exists ix_fsums_fid on fsums (fid);
|
||||
create index if not exists ix_fsums_csum on fsums (csum);
|
||||
""")
|
||||
|
@ -312,11 +304,7 @@ from facts order by created""")
|
|||
|
||||
# media
|
||||
###########
|
||||
_moveTable(db, "media")
|
||||
db.execute("""
|
||||
insert or ignore into media select filename, cast(created as int),
|
||||
originalPath from media2""")
|
||||
db.execute("drop table media2")
|
||||
db.execute("drop table media")
|
||||
|
||||
# models
|
||||
###########
|
||||
|
|
Binary file not shown.
|
@ -5,85 +5,57 @@ from anki import Deck
|
|||
from anki.utils import checksum
|
||||
from shared import getEmptyDeck, testDir
|
||||
|
||||
# uniqueness check
|
||||
def test_unique():
|
||||
d = getEmptyDeck()
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
# new file
|
||||
n = "foo.jpg"
|
||||
new = os.path.basename(d.media.uniquePath(dir, n))
|
||||
assert new == n
|
||||
# duplicate file
|
||||
open(os.path.join(dir, n), "w").write("hello")
|
||||
n = "foo.jpg"
|
||||
new = os.path.basename(d.media.uniquePath(dir, n))
|
||||
assert new == "foo (1).jpg"
|
||||
# another duplicate
|
||||
open(os.path.join(dir, "foo (1).jpg"), "w").write("hello")
|
||||
n = "foo.jpg"
|
||||
new = os.path.basename(d.media.uniquePath(dir, n))
|
||||
assert new == "foo (2).jpg"
|
||||
|
||||
# copying files to media folder
|
||||
def test_copy():
|
||||
def test_add():
|
||||
d = getEmptyDeck()
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
path = os.path.join(dir, "foo.jpg")
|
||||
open(path, "w").write("hello")
|
||||
# new file
|
||||
# new file, should preserve name
|
||||
assert d.media.addFile(path) == "foo.jpg"
|
||||
# dupe md5
|
||||
path = os.path.join(dir, "bar.jpg")
|
||||
open(path, "w").write("hello")
|
||||
# adding the same file again should not create a duplicate
|
||||
assert d.media.addFile(path) == "foo.jpg"
|
||||
|
||||
# media db
|
||||
def test_db():
|
||||
deck = getEmptyDeck()
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
path = os.path.join(dir, "foo.jpg")
|
||||
open(path, "w").write("hello")
|
||||
# add a new fact that references it twice
|
||||
f = deck.newFact()
|
||||
f['Front'] = u"<img src='foo.jpg'>"
|
||||
f['Back'] = u"back [sound:foo.jpg]"
|
||||
deck.addFact(f)
|
||||
# 1 entry in the media db, and no checksum
|
||||
assert deck.db.scalar("select count() from media") == 1
|
||||
assert not deck.db.scalar("select group_concat(csum, '') from media")
|
||||
# copy to media folder
|
||||
path = deck.media.addFile(path)
|
||||
# md5 should be set now
|
||||
assert deck.db.scalar("select count() from media") == 1
|
||||
assert deck.db.scalar("select group_concat(csum, '') from media")
|
||||
# detect file modifications
|
||||
oldsum = deck.db.scalar("select csum from media")
|
||||
# but if it has a different md5, it should
|
||||
open(path, "w").write("world")
|
||||
deck.media.rebuildMediaDir()
|
||||
newsum = deck.db.scalar("select csum from media")
|
||||
assert newsum and newsum != oldsum
|
||||
# delete underlying file and check db
|
||||
os.unlink(path)
|
||||
deck.media.rebuildMediaDir()
|
||||
# md5 should be gone again
|
||||
assert deck.db.scalar("select count() from media") == 1
|
||||
assert deck.db.scalar("select not csum from media")
|
||||
# media db should pick up media defined via templates & bulk update
|
||||
f['Back'] = u"bar.jpg"
|
||||
f.flush()
|
||||
# modify template & regenerate
|
||||
assert deck.db.scalar("select count() from media") == 1
|
||||
m = deck.currentModel()
|
||||
m.templates[0]['afmt']=u'<img src="{{{Back}}}">'
|
||||
m.flush()
|
||||
deck.renderQA(type="all")
|
||||
assert deck.db.scalar("select count() from media") == 2
|
||||
assert d.media.addFile(path) == "foo (1).jpg"
|
||||
|
||||
def test_strings():
|
||||
d = getEmptyDeck()
|
||||
mf = d.media.mediaFiles
|
||||
assert mf("aoeu") == []
|
||||
assert mf("aoeu<img src='foo.jpg'>ao") == ["foo.jpg"]
|
||||
assert mf("aoeu<img src=foo bar.jpg>ao") == ["foo bar.jpg"]
|
||||
assert mf("aoeu<img src=\"foo.jpg\">ao") == ["foo.jpg"]
|
||||
assert mf("aoeu<img src=\"foo.jpg\"><img class=yo src=fo>ao") == [
|
||||
"foo.jpg", "fo"]
|
||||
assert mf("aou[sound:foo.mp3]aou") == ["foo.mp3"]
|
||||
sp = d.media.stripMedia
|
||||
assert sp("aoeu") == "aoeu"
|
||||
assert sp("aoeu[sound:foo.mp3]aoeu") == "aoeuaoeu"
|
||||
assert sp("a<img src=yo>oeu") == "aoeu"
|
||||
es = d.media.escapeImages
|
||||
assert es("aoeu") == "aoeu"
|
||||
assert es("<img src='http://foo.com'>") == "<img src='http://foo.com'>"
|
||||
assert es('<img src="foo bar.jpg">') == '<img src="foo%20bar.jpg">'
|
||||
|
||||
def test_deckIntegration():
|
||||
deck = getEmptyDeck()
|
||||
d = getEmptyDeck()
|
||||
# create a media dir
|
||||
deck.media.mediaDir(create=True)
|
||||
d.media.dir(create=True)
|
||||
# put a file into it
|
||||
file = unicode(os.path.join(testDir, "deck/fake.png"))
|
||||
deck.media.addFile(file)
|
||||
print "todo: check media copied on rename"
|
||||
file = unicode(os.path.join(testDir, "support/fake.png"))
|
||||
d.media.addFile(file)
|
||||
# add a fact which references it
|
||||
f = d.newFact()
|
||||
f['Front'] = u"one"; f['Back'] = u"<img src='fake.png'>"
|
||||
d.addFact(f)
|
||||
# and one which references a non-existent file
|
||||
f = d.newFact()
|
||||
f['Front'] = u"one"; f['Back'] = u"<img src='fake2.png'>"
|
||||
d.addFact(f)
|
||||
# and add another file which isn't used
|
||||
open(os.path.join(d.media.dir(), "foo.jpg"), "wb").write("test")
|
||||
# check media
|
||||
ret = d.media.check()
|
||||
assert ret[0] == ["fake2.png"]
|
||||
assert ret[1] == ["foo.jpg"]
|
||||
|
|
Loading…
Reference in a new issue