mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 22:42:25 -04:00
remove the media table
The media table was originally introduced when Anki hashed media filenames, and needed a way to remember the original filename. It also helped with: 1) getting a quick list of all media used in the deck, or the media added since the last sync, for mobile clients 2) merging identical files with different names But had some drawbacks: - every operation that modifies templates, models or facts meant generating the q/a and checking if any new media had appeared - each entry is about 70 bytes, and some decks have 100k+ media files So we remove the media table. We address 1) by being more intelligent about media downloads on the mobile platform. We ask the user after a full sync if they want to look for missing media, and they can choose not to if they know they haven't added any. And on a partial sync, we can scan the contents of the incoming facts for media references, and download any references we find. This also avoids all the issues people had with media not downloading because it was in their media folder but not in the media database. For 2), when copying media to the media folder, if we have a duplicate filename, we check if that file has the same md5, and avoid copying if so. This won't merge identical content that has separate names, but instances where users need that are rare.
This commit is contained in:
parent
bd477de1a9
commit
be045d451c
6 changed files with 98 additions and 183 deletions
|
@ -450,7 +450,6 @@ select id from cards where fid in (select id from facts where mid = ?)""",
|
||||||
html = anki.template.render(format, fields)
|
html = anki.template.render(format, fields)
|
||||||
# if filters:
|
# if filters:
|
||||||
# d[type] = runFilter("renderQA.post", html, fields, meta, self)
|
# d[type] = runFilter("renderQA.post", html, fields, meta, self)
|
||||||
self.media.registerText(html)
|
|
||||||
d[type] = html
|
d[type] = html
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,7 @@ def buildImg(deck, latex):
|
||||||
texfile.write(latex)
|
texfile.write(latex)
|
||||||
texfile.close()
|
texfile.close()
|
||||||
# make sure we have a valid mediaDir
|
# make sure we have a valid mediaDir
|
||||||
mdir = deck.mediaDir(create=True)
|
mdir = deck.media.dir(create=True)
|
||||||
oldcwd = os.getcwd()
|
oldcwd = os.getcwd()
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
si = subprocess.STARTUPINFO()
|
si = subprocess.STARTUPINFO()
|
||||||
|
|
148
anki/media.py
148
anki/media.py
|
@ -15,17 +15,17 @@ class MediaRegistry(object):
|
||||||
def __init__(self, deck):
|
def __init__(self, deck):
|
||||||
self.deck = deck
|
self.deck = deck
|
||||||
self.mediaPrefix = ""
|
self.mediaPrefix = ""
|
||||||
self._mediaDir = None
|
self._dir = None
|
||||||
self._updateMediaDir()
|
self._updateDir()
|
||||||
|
|
||||||
def mediaDir(self, create=False):
|
def dir(self, create=False):
|
||||||
if self._mediaDir:
|
if self._dir:
|
||||||
return self._mediaDir
|
return self._dir
|
||||||
elif create:
|
elif create:
|
||||||
self._updateMediaDir(True)
|
self._updateDir(True)
|
||||||
return self._mediaDir
|
return self._dir
|
||||||
|
|
||||||
def _updateMediaDir(self, create=False):
|
def _updateDir(self, create=False):
|
||||||
if self.mediaPrefix:
|
if self.mediaPrefix:
|
||||||
dir = os.path.join(
|
dir = os.path.join(
|
||||||
self.mediaPrefix, os.path.basename(self.deck.path))
|
self.mediaPrefix, os.path.basename(self.deck.path))
|
||||||
|
@ -42,88 +42,32 @@ class MediaRegistry(object):
|
||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
# change to the current dir
|
# change to the current dir
|
||||||
os.chdir(dir)
|
os.chdir(dir)
|
||||||
self._mediaDir = dir
|
self._dir = dir
|
||||||
|
|
||||||
# Adding and registering media
|
# Adding media
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
def addFile(self, path):
|
def addFile(self, opath):
|
||||||
"""Copy PATH to MEDIADIR, and return new filename.
|
"""Copy PATH to MEDIADIR, and return new filename.
|
||||||
If a file with the same md5sum exists in the DB, return that.
|
If the same name exists, compare checksums."""
|
||||||
If a file with the same name exists, return a unique name."""
|
mdir = self.dir(create=True)
|
||||||
# see if have duplicate contents
|
|
||||||
csum = self.mediaChecksum(path)
|
|
||||||
if not csum:
|
|
||||||
# file was unreadable or didn't exist
|
|
||||||
return None
|
|
||||||
file = self.deck.db.scalar(
|
|
||||||
"select file from media where csum = :cs",
|
|
||||||
cs=csum)
|
|
||||||
if not file:
|
|
||||||
base = os.path.basename(path)
|
|
||||||
mdir = self.mediaDir(create=True)
|
|
||||||
file = self.uniquePath(mdir, base)
|
|
||||||
shutil.copy2(path, file)
|
|
||||||
self.registerFile(base)
|
|
||||||
return os.path.basename(file)
|
|
||||||
|
|
||||||
def registerFile(self, file):
|
|
||||||
"Add a single file to the media database."
|
|
||||||
if self.mediaDir():
|
|
||||||
csum = self.mediaChecksum(os.path.join(self.mediaDir(), file))
|
|
||||||
else:
|
|
||||||
csum = ""
|
|
||||||
self.deck.db.execute(
|
|
||||||
"insert or replace into media values (?, ?, ?)",
|
|
||||||
file, intTime(), csum)
|
|
||||||
|
|
||||||
def registerText(self, string):
|
|
||||||
"Add all media in string to the media database."
|
|
||||||
for f in self.mediaFiles(string):
|
|
||||||
self.registerFile(f)
|
|
||||||
|
|
||||||
def removeUnusedMedia(deck):
|
|
||||||
ids = deck.s.list("select id from media where size = 0")
|
|
||||||
for id in ids:
|
|
||||||
deck.s.statement("insert into mediaDeleted values (:id, :t)",
|
|
||||||
id=id, t=time.time())
|
|
||||||
deck.s.statement("delete from media where size = 0")
|
|
||||||
|
|
||||||
# Moving media
|
|
||||||
##########################################################################
|
|
||||||
|
|
||||||
def renameMediaDir(self, oldPath):
|
|
||||||
"Copy oldPath to our current media dir. "
|
|
||||||
assert os.path.exists(oldPath)
|
|
||||||
newPath = self.mediaDir(create=None)
|
|
||||||
# copytree doesn't want the dir to exist
|
|
||||||
try:
|
|
||||||
shutil.copytree(oldPath, newPath)
|
|
||||||
except:
|
|
||||||
# FIXME: should really remove everything in old dir instead of
|
|
||||||
# giving up
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Tools
|
|
||||||
##########################################################################
|
|
||||||
|
|
||||||
def mediaChecksum(self, path):
|
|
||||||
"Return checksum of PATH, or empty string."
|
|
||||||
try:
|
|
||||||
return checksum(open(path, "rb").read())
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def uniquePath(self, dir, base):
|
|
||||||
# remove any dangerous characters
|
# remove any dangerous characters
|
||||||
base = re.sub(r"[][<>:/\\&]", "", base)
|
base = re.sub(r"[][<>:/\\&]", "", os.path.basename(opath))
|
||||||
# find a unique name
|
dst = os.path.join(mdir, base)
|
||||||
|
# if it doesn't exist, copy it directly
|
||||||
|
if not os.path.exists(dst):
|
||||||
|
shutil.copy2(opath, dst)
|
||||||
|
return base
|
||||||
|
# if it's identical, reuse
|
||||||
|
if self.filesIdentical(opath, dst):
|
||||||
|
return base
|
||||||
|
# otherwise, find a unique name
|
||||||
(root, ext) = os.path.splitext(base)
|
(root, ext) = os.path.splitext(base)
|
||||||
def repl(match):
|
def repl(match):
|
||||||
n = int(match.group(1))
|
n = int(match.group(1))
|
||||||
return " (%d)" % (n+1)
|
return " (%d)" % (n+1)
|
||||||
while True:
|
while True:
|
||||||
path = os.path.join(dir, root + ext)
|
path = os.path.join(mdir, root + ext)
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
break
|
break
|
||||||
reg = " \((\d+)\)$"
|
reg = " \((\d+)\)$"
|
||||||
|
@ -131,7 +75,14 @@ If a file with the same name exists, return a unique name."""
|
||||||
root = root + " (1)"
|
root = root + " (1)"
|
||||||
else:
|
else:
|
||||||
root = re.sub(reg, repl, root)
|
root = re.sub(reg, repl, root)
|
||||||
return path
|
# copy and return
|
||||||
|
shutil.copy2(opath, path)
|
||||||
|
return os.path.basename(os.path.basename(path))
|
||||||
|
|
||||||
|
def filesIdentical(self, path1, path2):
|
||||||
|
"True if files are the same."
|
||||||
|
return (checksum(open(path1, "rb").read()) ==
|
||||||
|
checksum(open(path2, "rb").read()))
|
||||||
|
|
||||||
# String manipulation
|
# String manipulation
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
@ -163,25 +114,20 @@ If a file with the same name exists, return a unique name."""
|
||||||
# Rebuilding DB
|
# Rebuilding DB
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
def rebuildMediaDir(self, delete=False):
|
def check(self, delete=False):
|
||||||
mdir = self.mediaDir()
|
"Return (missingFiles, unusedFiles)."
|
||||||
|
mdir = self.dir()
|
||||||
if not mdir:
|
if not mdir:
|
||||||
return (0, 0)
|
return (0, 0)
|
||||||
# delete all media entries in database
|
# generate card q/a and look through all references
|
||||||
self.deck.db.execute("delete from media")
|
|
||||||
# look through cards for media references
|
|
||||||
normrefs = {}
|
normrefs = {}
|
||||||
def norm(s):
|
def norm(s):
|
||||||
if isinstance(s, unicode):
|
if isinstance(s, unicode):
|
||||||
return unicodedata.normalize('NFD', s)
|
return unicodedata.normalize('NFD', s)
|
||||||
return s
|
return s
|
||||||
# generate q/a and look through all references
|
for f in self.allMedia():
|
||||||
for p in self.deck.renderQA(type="all"):
|
|
||||||
for type in ("q", "a"):
|
|
||||||
for f in self.mediaFiles(p[type]):
|
|
||||||
normrefs[norm(f)] = True
|
normrefs[norm(f)] = True
|
||||||
self.registerFile(f)
|
# loop through directory and find unused & missing media
|
||||||
# find unused media
|
|
||||||
unused = []
|
unused = []
|
||||||
for file in os.listdir(mdir):
|
for file in os.listdir(mdir):
|
||||||
path = os.path.join(mdir, file)
|
path = os.path.join(mdir, file)
|
||||||
|
@ -191,15 +137,25 @@ If a file with the same name exists, return a unique name."""
|
||||||
nfile = norm(file)
|
nfile = norm(file)
|
||||||
if nfile not in normrefs:
|
if nfile not in normrefs:
|
||||||
unused.append(file)
|
unused.append(file)
|
||||||
|
else:
|
||||||
|
del normrefs[nfile]
|
||||||
# optionally delete
|
# optionally delete
|
||||||
if delete:
|
if delete:
|
||||||
for f in unused:
|
for f in unused:
|
||||||
path = os.path.join(mdir, f)
|
path = os.path.join(mdir, f)
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
nohave = self.deck.db.list(
|
nohave = normrefs.keys()
|
||||||
"select file from media where csum = ''")
|
|
||||||
return (nohave, unused)
|
return (nohave, unused)
|
||||||
|
|
||||||
|
def allMedia(self):
|
||||||
|
"Return a set of all referenced filenames."
|
||||||
|
files = set()
|
||||||
|
for p in self.deck.renderQA(type="all"):
|
||||||
|
for type in ("q", "a"):
|
||||||
|
for f in self.mediaFiles(p[type]):
|
||||||
|
files.add(f)
|
||||||
|
return files
|
||||||
|
|
||||||
# Download missing
|
# Download missing
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
|
@ -207,7 +163,7 @@ If a file with the same name exists, return a unique name."""
|
||||||
urlbase = self.deck.getVar("mediaURL")
|
urlbase = self.deck.getVar("mediaURL")
|
||||||
if not urlbase:
|
if not urlbase:
|
||||||
return None
|
return None
|
||||||
mdir = self.deck.mediaDir(create=True)
|
mdir = self.deck.dir(create=True)
|
||||||
missing = 0
|
missing = 0
|
||||||
grabbed = 0
|
grabbed = 0
|
||||||
for c, (f, sum) in enumerate(self.deck.db.all(
|
for c, (f, sum) in enumerate(self.deck.db.all(
|
||||||
|
@ -233,7 +189,7 @@ If a file with the same name exists, return a unique name."""
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
def downloadRemote(self):
|
def downloadRemote(self):
|
||||||
mdir = self.deck.mediaDir(create=True)
|
mdir = self.deck.dir(create=True)
|
||||||
refs = {}
|
refs = {}
|
||||||
for (question, answer) in self.deck.db.all(
|
for (question, answer) in self.deck.db.all(
|
||||||
"select question, answer from cards"):
|
"select question, answer from cards"):
|
||||||
|
|
|
@ -130,12 +130,6 @@ create table if not exists gconf (
|
||||||
conf text not null
|
conf text not null
|
||||||
);
|
);
|
||||||
|
|
||||||
create table if not exists media (
|
|
||||||
file text primary key,
|
|
||||||
mod integer not null,
|
|
||||||
csum text not null
|
|
||||||
);
|
|
||||||
|
|
||||||
create table if not exists revlog (
|
create table if not exists revlog (
|
||||||
time integer primary key,
|
time integer primary key,
|
||||||
cid integer not null,
|
cid integer not null,
|
||||||
|
@ -182,9 +176,7 @@ create index if not exists ix_facts_mod on facts (mod);
|
||||||
create index if not exists ix_cards_fid on cards (fid);
|
create index if not exists ix_cards_fid on cards (fid);
|
||||||
-- revlog by card
|
-- revlog by card
|
||||||
create index if not exists ix_revlog_cid on revlog (cid);
|
create index if not exists ix_revlog_cid on revlog (cid);
|
||||||
-- media
|
-- field uniqueness check
|
||||||
create index if not exists ix_media_csum on media (csum);
|
|
||||||
-- unique checking
|
|
||||||
create index if not exists ix_fsums_fid on fsums (fid);
|
create index if not exists ix_fsums_fid on fsums (fid);
|
||||||
create index if not exists ix_fsums_csum on fsums (csum);
|
create index if not exists ix_fsums_csum on fsums (csum);
|
||||||
""")
|
""")
|
||||||
|
@ -312,11 +304,7 @@ from facts order by created""")
|
||||||
|
|
||||||
# media
|
# media
|
||||||
###########
|
###########
|
||||||
_moveTable(db, "media")
|
db.execute("drop table media")
|
||||||
db.execute("""
|
|
||||||
insert or ignore into media select filename, cast(created as int),
|
|
||||||
originalPath from media2""")
|
|
||||||
db.execute("drop table media2")
|
|
||||||
|
|
||||||
# models
|
# models
|
||||||
###########
|
###########
|
||||||
|
|
Binary file not shown.
|
@ -5,85 +5,57 @@ from anki import Deck
|
||||||
from anki.utils import checksum
|
from anki.utils import checksum
|
||||||
from shared import getEmptyDeck, testDir
|
from shared import getEmptyDeck, testDir
|
||||||
|
|
||||||
# uniqueness check
|
|
||||||
def test_unique():
|
|
||||||
d = getEmptyDeck()
|
|
||||||
dir = tempfile.mkdtemp(prefix="anki")
|
|
||||||
# new file
|
|
||||||
n = "foo.jpg"
|
|
||||||
new = os.path.basename(d.media.uniquePath(dir, n))
|
|
||||||
assert new == n
|
|
||||||
# duplicate file
|
|
||||||
open(os.path.join(dir, n), "w").write("hello")
|
|
||||||
n = "foo.jpg"
|
|
||||||
new = os.path.basename(d.media.uniquePath(dir, n))
|
|
||||||
assert new == "foo (1).jpg"
|
|
||||||
# another duplicate
|
|
||||||
open(os.path.join(dir, "foo (1).jpg"), "w").write("hello")
|
|
||||||
n = "foo.jpg"
|
|
||||||
new = os.path.basename(d.media.uniquePath(dir, n))
|
|
||||||
assert new == "foo (2).jpg"
|
|
||||||
|
|
||||||
# copying files to media folder
|
# copying files to media folder
|
||||||
def test_copy():
|
def test_add():
|
||||||
d = getEmptyDeck()
|
d = getEmptyDeck()
|
||||||
dir = tempfile.mkdtemp(prefix="anki")
|
dir = tempfile.mkdtemp(prefix="anki")
|
||||||
path = os.path.join(dir, "foo.jpg")
|
path = os.path.join(dir, "foo.jpg")
|
||||||
open(path, "w").write("hello")
|
open(path, "w").write("hello")
|
||||||
# new file
|
# new file, should preserve name
|
||||||
assert d.media.addFile(path) == "foo.jpg"
|
assert d.media.addFile(path) == "foo.jpg"
|
||||||
# dupe md5
|
# adding the same file again should not create a duplicate
|
||||||
path = os.path.join(dir, "bar.jpg")
|
|
||||||
open(path, "w").write("hello")
|
|
||||||
assert d.media.addFile(path) == "foo.jpg"
|
assert d.media.addFile(path) == "foo.jpg"
|
||||||
|
# but if it has a different md5, it should
|
||||||
# media db
|
|
||||||
def test_db():
|
|
||||||
deck = getEmptyDeck()
|
|
||||||
dir = tempfile.mkdtemp(prefix="anki")
|
|
||||||
path = os.path.join(dir, "foo.jpg")
|
|
||||||
open(path, "w").write("hello")
|
|
||||||
# add a new fact that references it twice
|
|
||||||
f = deck.newFact()
|
|
||||||
f['Front'] = u"<img src='foo.jpg'>"
|
|
||||||
f['Back'] = u"back [sound:foo.jpg]"
|
|
||||||
deck.addFact(f)
|
|
||||||
# 1 entry in the media db, and no checksum
|
|
||||||
assert deck.db.scalar("select count() from media") == 1
|
|
||||||
assert not deck.db.scalar("select group_concat(csum, '') from media")
|
|
||||||
# copy to media folder
|
|
||||||
path = deck.media.addFile(path)
|
|
||||||
# md5 should be set now
|
|
||||||
assert deck.db.scalar("select count() from media") == 1
|
|
||||||
assert deck.db.scalar("select group_concat(csum, '') from media")
|
|
||||||
# detect file modifications
|
|
||||||
oldsum = deck.db.scalar("select csum from media")
|
|
||||||
open(path, "w").write("world")
|
open(path, "w").write("world")
|
||||||
deck.media.rebuildMediaDir()
|
assert d.media.addFile(path) == "foo (1).jpg"
|
||||||
newsum = deck.db.scalar("select csum from media")
|
|
||||||
assert newsum and newsum != oldsum
|
def test_strings():
|
||||||
# delete underlying file and check db
|
d = getEmptyDeck()
|
||||||
os.unlink(path)
|
mf = d.media.mediaFiles
|
||||||
deck.media.rebuildMediaDir()
|
assert mf("aoeu") == []
|
||||||
# md5 should be gone again
|
assert mf("aoeu<img src='foo.jpg'>ao") == ["foo.jpg"]
|
||||||
assert deck.db.scalar("select count() from media") == 1
|
assert mf("aoeu<img src=foo bar.jpg>ao") == ["foo bar.jpg"]
|
||||||
assert deck.db.scalar("select not csum from media")
|
assert mf("aoeu<img src=\"foo.jpg\">ao") == ["foo.jpg"]
|
||||||
# media db should pick up media defined via templates & bulk update
|
assert mf("aoeu<img src=\"foo.jpg\"><img class=yo src=fo>ao") == [
|
||||||
f['Back'] = u"bar.jpg"
|
"foo.jpg", "fo"]
|
||||||
f.flush()
|
assert mf("aou[sound:foo.mp3]aou") == ["foo.mp3"]
|
||||||
# modify template & regenerate
|
sp = d.media.stripMedia
|
||||||
assert deck.db.scalar("select count() from media") == 1
|
assert sp("aoeu") == "aoeu"
|
||||||
m = deck.currentModel()
|
assert sp("aoeu[sound:foo.mp3]aoeu") == "aoeuaoeu"
|
||||||
m.templates[0]['afmt']=u'<img src="{{{Back}}}">'
|
assert sp("a<img src=yo>oeu") == "aoeu"
|
||||||
m.flush()
|
es = d.media.escapeImages
|
||||||
deck.renderQA(type="all")
|
assert es("aoeu") == "aoeu"
|
||||||
assert deck.db.scalar("select count() from media") == 2
|
assert es("<img src='http://foo.com'>") == "<img src='http://foo.com'>"
|
||||||
|
assert es('<img src="foo bar.jpg">') == '<img src="foo%20bar.jpg">'
|
||||||
|
|
||||||
def test_deckIntegration():
|
def test_deckIntegration():
|
||||||
deck = getEmptyDeck()
|
d = getEmptyDeck()
|
||||||
# create a media dir
|
# create a media dir
|
||||||
deck.media.mediaDir(create=True)
|
d.media.dir(create=True)
|
||||||
# put a file into it
|
# put a file into it
|
||||||
file = unicode(os.path.join(testDir, "deck/fake.png"))
|
file = unicode(os.path.join(testDir, "support/fake.png"))
|
||||||
deck.media.addFile(file)
|
d.media.addFile(file)
|
||||||
print "todo: check media copied on rename"
|
# add a fact which references it
|
||||||
|
f = d.newFact()
|
||||||
|
f['Front'] = u"one"; f['Back'] = u"<img src='fake.png'>"
|
||||||
|
d.addFact(f)
|
||||||
|
# and one which references a non-existent file
|
||||||
|
f = d.newFact()
|
||||||
|
f['Front'] = u"one"; f['Back'] = u"<img src='fake2.png'>"
|
||||||
|
d.addFact(f)
|
||||||
|
# and add another file which isn't used
|
||||||
|
open(os.path.join(d.media.dir(), "foo.jpg"), "wb").write("test")
|
||||||
|
# check media
|
||||||
|
ret = d.media.check()
|
||||||
|
assert ret[0] == ["fake2.png"]
|
||||||
|
assert ret[1] == ["foo.jpg"]
|
||||||
|
|
Loading…
Reference in a new issue