diff --git a/anki/deck.py b/anki/deck.py index 1e22ec18e..444f88115 100644 --- a/anki/deck.py +++ b/anki/deck.py @@ -450,7 +450,6 @@ select id from cards where fid in (select id from facts where mid = ?)""", html = anki.template.render(format, fields) # if filters: # d[type] = runFilter("renderQA.post", html, fields, meta, self) - self.media.registerText(html) d[type] = html return d diff --git a/anki/latex.py b/anki/latex.py index dabbadfb0..97a847593 100644 --- a/anki/latex.py +++ b/anki/latex.py @@ -69,7 +69,7 @@ def buildImg(deck, latex): texfile.write(latex) texfile.close() # make sure we have a valid mediaDir - mdir = deck.mediaDir(create=True) + mdir = deck.media.dir(create=True) oldcwd = os.getcwd() if sys.platform == "win32": si = subprocess.STARTUPINFO() diff --git a/anki/media.py b/anki/media.py index 0c49f213f..740ec6b00 100644 --- a/anki/media.py +++ b/anki/media.py @@ -15,17 +15,17 @@ class MediaRegistry(object): def __init__(self, deck): self.deck = deck self.mediaPrefix = "" - self._mediaDir = None - self._updateMediaDir() + self._dir = None + self._updateDir() - def mediaDir(self, create=False): - if self._mediaDir: - return self._mediaDir + def dir(self, create=False): + if self._dir: + return self._dir elif create: - self._updateMediaDir(True) - return self._mediaDir + self._updateDir(True) + return self._dir - def _updateMediaDir(self, create=False): + def _updateDir(self, create=False): if self.mediaPrefix: dir = os.path.join( self.mediaPrefix, os.path.basename(self.deck.path)) @@ -42,88 +42,32 @@ class MediaRegistry(object): os.makedirs(dir) # change to the current dir os.chdir(dir) - self._mediaDir = dir + self._dir = dir - # Adding and registering media + # Adding media ########################################################################## - def addFile(self, path): + def addFile(self, opath): """Copy PATH to MEDIADIR, and return new filename. -If a file with the same md5sum exists in the DB, return that. -If a file with the same name exists, return a unique name.""" - # see if have duplicate contents - csum = self.mediaChecksum(path) - if not csum: - # file was unreadable or didn't exist - return None - file = self.deck.db.scalar( - "select file from media where csum = :cs", - cs=csum) - if not file: - base = os.path.basename(path) - mdir = self.mediaDir(create=True) - file = self.uniquePath(mdir, base) - shutil.copy2(path, file) - self.registerFile(base) - return os.path.basename(file) - - def registerFile(self, file): - "Add a single file to the media database." - if self.mediaDir(): - csum = self.mediaChecksum(os.path.join(self.mediaDir(), file)) - else: - csum = "" - self.deck.db.execute( - "insert or replace into media values (?, ?, ?)", - file, intTime(), csum) - - def registerText(self, string): - "Add all media in string to the media database." - for f in self.mediaFiles(string): - self.registerFile(f) - - def removeUnusedMedia(deck): - ids = deck.s.list("select id from media where size = 0") - for id in ids: - deck.s.statement("insert into mediaDeleted values (:id, :t)", - id=id, t=time.time()) - deck.s.statement("delete from media where size = 0") - - # Moving media - ########################################################################## - - def renameMediaDir(self, oldPath): - "Copy oldPath to our current media dir. " - assert os.path.exists(oldPath) - newPath = self.mediaDir(create=None) - # copytree doesn't want the dir to exist - try: - shutil.copytree(oldPath, newPath) - except: - # FIXME: should really remove everything in old dir instead of - # giving up - pass - - # Tools - ########################################################################## - - def mediaChecksum(self, path): - "Return checksum of PATH, or empty string." - try: - return checksum(open(path, "rb").read()) - except: - return "" - - def uniquePath(self, dir, base): +If the same name exists, compare checksums.""" + mdir = self.dir(create=True) # remove any dangerous characters - base = re.sub(r"[][<>:/\\&]", "", base) - # find a unique name + base = re.sub(r"[][<>:/\\&]", "", os.path.basename(opath)) + dst = os.path.join(mdir, base) + # if it doesn't exist, copy it directly + if not os.path.exists(dst): + shutil.copy2(opath, dst) + return base + # if it's identical, reuse + if self.filesIdentical(opath, dst): + return base + # otherwise, find a unique name (root, ext) = os.path.splitext(base) def repl(match): n = int(match.group(1)) return " (%d)" % (n+1) while True: - path = os.path.join(dir, root + ext) + path = os.path.join(mdir, root + ext) if not os.path.exists(path): break reg = " \((\d+)\)$" @@ -131,7 +75,14 @@ If a file with the same name exists, return a unique name.""" root = root + " (1)" else: root = re.sub(reg, repl, root) - return path + # copy and return + shutil.copy2(opath, path) + return os.path.basename(os.path.basename(path)) + + def filesIdentical(self, path1, path2): + "True if files are the same." + return (checksum(open(path1, "rb").read()) == + checksum(open(path2, "rb").read())) # String manipulation ########################################################################## @@ -163,25 +114,20 @@ If a file with the same name exists, return a unique name.""" # Rebuilding DB ########################################################################## - def rebuildMediaDir(self, delete=False): - mdir = self.mediaDir() + def check(self, delete=False): + "Return (missingFiles, unusedFiles)." + mdir = self.dir() if not mdir: return (0, 0) - # delete all media entries in database - self.deck.db.execute("delete from media") - # look through cards for media references + # generate card q/a and look through all references normrefs = {} def norm(s): if isinstance(s, unicode): return unicodedata.normalize('NFD', s) return s - # generate q/a and look through all references - for p in self.deck.renderQA(type="all"): - for type in ("q", "a"): - for f in self.mediaFiles(p[type]): - normrefs[norm(f)] = True - self.registerFile(f) - # find unused media + for f in self.allMedia(): + normrefs[norm(f)] = True + # loop through directory and find unused & missing media unused = [] for file in os.listdir(mdir): path = os.path.join(mdir, file) @@ -191,15 +137,25 @@ If a file with the same name exists, return a unique name.""" nfile = norm(file) if nfile not in normrefs: unused.append(file) + else: + del normrefs[nfile] # optionally delete if delete: for f in unused: path = os.path.join(mdir, f) os.unlink(path) - nohave = self.deck.db.list( - "select file from media where csum = ''") + nohave = normrefs.keys() return (nohave, unused) + def allMedia(self): + "Return a set of all referenced filenames." + files = set() + for p in self.deck.renderQA(type="all"): + for type in ("q", "a"): + for f in self.mediaFiles(p[type]): + files.add(f) + return files + # Download missing ########################################################################## @@ -207,7 +163,7 @@ If a file with the same name exists, return a unique name.""" urlbase = self.deck.getVar("mediaURL") if not urlbase: return None - mdir = self.deck.mediaDir(create=True) + mdir = self.deck.dir(create=True) missing = 0 grabbed = 0 for c, (f, sum) in enumerate(self.deck.db.all( @@ -233,7 +189,7 @@ If a file with the same name exists, return a unique name.""" ########################################################################## def downloadRemote(self): - mdir = self.deck.mediaDir(create=True) + mdir = self.deck.dir(create=True) refs = {} for (question, answer) in self.deck.db.all( "select question, answer from cards"): diff --git a/anki/storage.py b/anki/storage.py index b51b30daf..55cf57d4a 100644 --- a/anki/storage.py +++ b/anki/storage.py @@ -130,12 +130,6 @@ create table if not exists gconf ( conf text not null ); -create table if not exists media ( - file text primary key, - mod integer not null, - csum text not null -); - create table if not exists revlog ( time integer primary key, cid integer not null, @@ -182,9 +176,7 @@ create index if not exists ix_facts_mod on facts (mod); create index if not exists ix_cards_fid on cards (fid); -- revlog by card create index if not exists ix_revlog_cid on revlog (cid); --- media -create index if not exists ix_media_csum on media (csum); --- unique checking +-- field uniqueness check create index if not exists ix_fsums_fid on fsums (fid); create index if not exists ix_fsums_csum on fsums (csum); """) @@ -312,11 +304,7 @@ from facts order by created""") # media ########### - _moveTable(db, "media") - db.execute(""" -insert or ignore into media select filename, cast(created as int), -originalPath from media2""") - db.execute("drop table media2") + db.execute("drop table media") # models ########### diff --git a/tests/support/anki12.anki b/tests/support/anki12.anki index 24d027556..7fd7be092 100644 Binary files a/tests/support/anki12.anki and b/tests/support/anki12.anki differ diff --git a/tests/test_media.py b/tests/test_media.py index e9d147f38..83184f867 100644 --- a/tests/test_media.py +++ b/tests/test_media.py @@ -5,85 +5,57 @@ from anki import Deck from anki.utils import checksum from shared import getEmptyDeck, testDir -# uniqueness check -def test_unique(): - d = getEmptyDeck() - dir = tempfile.mkdtemp(prefix="anki") - # new file - n = "foo.jpg" - new = os.path.basename(d.media.uniquePath(dir, n)) - assert new == n - # duplicate file - open(os.path.join(dir, n), "w").write("hello") - n = "foo.jpg" - new = os.path.basename(d.media.uniquePath(dir, n)) - assert new == "foo (1).jpg" - # another duplicate - open(os.path.join(dir, "foo (1).jpg"), "w").write("hello") - n = "foo.jpg" - new = os.path.basename(d.media.uniquePath(dir, n)) - assert new == "foo (2).jpg" - # copying files to media folder -def test_copy(): +def test_add(): d = getEmptyDeck() dir = tempfile.mkdtemp(prefix="anki") path = os.path.join(dir, "foo.jpg") open(path, "w").write("hello") - # new file + # new file, should preserve name assert d.media.addFile(path) == "foo.jpg" - # dupe md5 - path = os.path.join(dir, "bar.jpg") - open(path, "w").write("hello") + # adding the same file again should not create a duplicate assert d.media.addFile(path) == "foo.jpg" - -# media db -def test_db(): - deck = getEmptyDeck() - dir = tempfile.mkdtemp(prefix="anki") - path = os.path.join(dir, "foo.jpg") - open(path, "w").write("hello") - # add a new fact that references it twice - f = deck.newFact() - f['Front'] = u"" - f['Back'] = u"back [sound:foo.jpg]" - deck.addFact(f) - # 1 entry in the media db, and no checksum - assert deck.db.scalar("select count() from media") == 1 - assert not deck.db.scalar("select group_concat(csum, '') from media") - # copy to media folder - path = deck.media.addFile(path) - # md5 should be set now - assert deck.db.scalar("select count() from media") == 1 - assert deck.db.scalar("select group_concat(csum, '') from media") - # detect file modifications - oldsum = deck.db.scalar("select csum from media") + # but if it has a different md5, it should open(path, "w").write("world") - deck.media.rebuildMediaDir() - newsum = deck.db.scalar("select csum from media") - assert newsum and newsum != oldsum - # delete underlying file and check db - os.unlink(path) - deck.media.rebuildMediaDir() - # md5 should be gone again - assert deck.db.scalar("select count() from media") == 1 - assert deck.db.scalar("select not csum from media") - # media db should pick up media defined via templates & bulk update - f['Back'] = u"bar.jpg" - f.flush() - # modify template & regenerate - assert deck.db.scalar("select count() from media") == 1 - m = deck.currentModel() - m.templates[0]['afmt']=u'' - m.flush() - deck.renderQA(type="all") - assert deck.db.scalar("select count() from media") == 2 + assert d.media.addFile(path) == "foo (1).jpg" + +def test_strings(): + d = getEmptyDeck() + mf = d.media.mediaFiles + assert mf("aoeu") == [] + assert mf("aoeuao") == ["foo.jpg"] + assert mf("aoeuao") == ["foo bar.jpg"] + assert mf("aoeuao") == ["foo.jpg"] + assert mf("aoeuao") == [ + "foo.jpg", "fo"] + assert mf("aou[sound:foo.mp3]aou") == ["foo.mp3"] + sp = d.media.stripMedia + assert sp("aoeu") == "aoeu" + assert sp("aoeu[sound:foo.mp3]aoeu") == "aoeuaoeu" + assert sp("aoeu") == "aoeu" + es = d.media.escapeImages + assert es("aoeu") == "aoeu" + assert es("") == "" + assert es('') == '' def test_deckIntegration(): - deck = getEmptyDeck() + d = getEmptyDeck() # create a media dir - deck.media.mediaDir(create=True) + d.media.dir(create=True) # put a file into it - file = unicode(os.path.join(testDir, "deck/fake.png")) - deck.media.addFile(file) - print "todo: check media copied on rename" + file = unicode(os.path.join(testDir, "support/fake.png")) + d.media.addFile(file) + # add a fact which references it + f = d.newFact() + f['Front'] = u"one"; f['Back'] = u"" + d.addFact(f) + # and one which references a non-existent file + f = d.newFact() + f['Front'] = u"one"; f['Back'] = u"" + d.addFact(f) + # and add another file which isn't used + open(os.path.join(d.media.dir(), "foo.jpg"), "wb").write("test") + # check media + ret = d.media.check() + assert ret[0] == ["fake2.png"] + assert ret[1] == ["foo.jpg"]