From 0c9672e7b8473646372529d095a87f1113f3b0ea Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Sat, 11 Dec 2010 00:51:48 +0900 Subject: [PATCH] rewrite media support - media is no longer hashed, and instead stored in the db using its original name - when adding media, its checksum is calculated and used to look for duplicates - duplicate filenames will result in a number tacked on the file - the size column is used to count card references to media. If media is referenced in a fact but not the question or answer, the count will be zero. - there is no guarantee media will be listed in the media db if it is unused on the question & answer - if rebuildMediaDir(delete=True), then entries with zero references are deleted, along with any unused files in the media dir. - rebuildMediaDir() will update the internal checksums, and set the checksum to "" if a file can't be found - rebuildMediaDir() is a lot less destructive now, and will leave alone directories it finds in the media folder (but not look in them either) - rebuildMediaDir() returns more information about the state of media now - the online and mobile clients will need to to make sure that when downloading media, entries with no checksum are non-fatal and should not abort the download process. - the ref count is updated every time the q/a is updated - so the db should be up to date after every add/edit/import - since we look for media on the q/a now, card templates like '' will work now - export original files as gone as it is not needed anymore - move from per-model media URL to deckVar. downloadMissingMedia() uses this now. Deck subscriptions will have to be updated to share media another way. - pass deck in formatQA, as latex support is going to change --- anki/__init__.py | 2 +- anki/cards.py | 38 ++++- anki/deck.py | 74 ++++++--- anki/facts.py | 14 +- anki/media.py | 380 ++++++++++++++++++-------------------------- anki/models.py | 6 +- anki/sync.py | 4 +- tests/test_media.py | 106 ++++++++++++ tests/test_sync.py | 4 +- 9 files changed, 358 insertions(+), 270 deletions(-) create mode 100644 tests/test_media.py diff --git a/anki/__init__.py b/anki/__init__.py index 2eb277f2c..45032c9e6 100644 --- a/anki/__init__.py +++ b/anki/__init__.py @@ -29,7 +29,7 @@ Edit the card: fields = card.fact.model.fieldModels for field in fields: card.fact[field.name] = "newvalue" - card.fact.setModified(textChanged=True) + card.fact.setModified(textChanged=True, deck=deck) deck.setModified() Get all cards via ORM (slow): diff --git a/anki/cards.py b/anki/cards.py index 2516cf210..ebeaee963 100644 --- a/anki/cards.py +++ b/anki/cards.py @@ -13,6 +13,7 @@ from anki.db import * from anki.models import CardModel, Model, FieldModel, formatQA from anki.facts import Fact, factsTable, Field from anki.utils import parseTags, findTag, stripHTML, genID, hexifyID +from anki.media import updateMediaCount, mediaFiles # Cards ########################################################################## @@ -92,12 +93,37 @@ class Card(object): # for non-orm use self.cardModelId = cardModel.id self.ordinal = cardModel.ordinal - d = {} - for f in self.fact.model.fieldModels: - d[f.name] = (f.id, self.fact[f.name]) - qa = formatQA(None, fact.modelId, d, self.splitTags(), cardModel) - self.question = qa['question'] - self.answer = qa['answer'] + + def rebuildQA(self, deck, media=True): + # format qa + d = {} + for f in self.fact.model.fieldModels: + d[f.name] = (f.id, self.fact[f.name]) + qa = formatQA(None, self.fact.modelId, d, self.splitTags(), + self.cardModel, deck) + # find old media references + files = {} + for type in ("question", "answer"): + for f in mediaFiles(getattr(self, type) or ""): + if f in files: + files[f] -= 1 + else: + files[f] = -1 + # update q/a + self.question = qa['question'] + self.answer = qa['answer'] + # determine media delta + for type in ("question", "answer"): + for f in mediaFiles(getattr(self, type)): + if f in files: + files[f] += 1 + else: + files[f] = 1 + # update media counts if we're attached to deck + if media: + for (f, cnt) in files.items(): + updateMediaCount(deck, f, cnt) + self.setModified() def setModified(self): self.modified = time.time() diff --git a/anki/deck.py b/anki/deck.py index 087832363..48f3c5632 100644 --- a/anki/deck.py +++ b/anki/deck.py @@ -26,8 +26,10 @@ from operator import itemgetter from itertools import groupby from anki.hooks import runHook, hookEmpty from anki.template import render +from anki.media import updateMediaCount, mediaFiles, \ + rebuildMediaDir -# ensure all the metadata in other files is loaded before proceeding +# ensure all the DB metadata in other files is loaded before proceeding import anki.models, anki.facts, anki.cards, anki.stats import anki.history, anki.media @@ -69,7 +71,7 @@ SEARCH_FIELD = 6 SEARCH_FIELD_EXISTS = 7 SEARCH_QA = 8 SEARCH_PHRASE_WB = 9 -DECK_VERSION = 58 +DECK_VERSION = 60 deckVarsTable = Table( 'deckVars', metadata, @@ -161,7 +163,7 @@ class Deck(object): self.lastSessionStart = 0 self.queueLimit = 200 # if most recent deck var not defined, make sure defaults are set - if not self.s.scalar("select 1 from deckVars where key = 'newSpacing'"): + if not self.s.scalar("select 1 from deckVars where key = 'mediaURL'"): self.setVarDefault("suspendLeeches", True) self.setVarDefault("leechFails", 16) self.setVarDefault("perDay", True) @@ -170,6 +172,7 @@ class Deck(object): self.setVarDefault("newInactive", self.suspended) self.setVarDefault("revInactive", self.suspended) self.setVarDefault("newSpacing", 60) + self.setVarDefault("mediaURL", "") self.updateCutoff() self.setupStandardScheduler() @@ -719,6 +722,7 @@ limit %s""" % (self.cramOrder, self.queueLimit))) card = anki.cards.Card() if not card.fromDB(self.s, id): return + card.deck = self card.genFuzz() card.startTimer() return card @@ -898,7 +902,7 @@ and type between 1 and 2""", tags = scard.fact.tags tags = addTags("Leech", tags) scard.fact.tags = canonifyTags(tags) - scard.fact.setModified(textChanged=True) + scard.fact.setModified(textChanged=True, deck=self) self.updateFactTags([scard.fact.id]) self.s.flush() self.s.expunge(scard) @@ -1425,7 +1429,6 @@ and due < :now""", now=time.time()) cards = [] self.s.save(fact) # update field cache - fact.setModified(True) self.factCount += 1 self.flushMod() isRandom = self.newCardOrder == NEW_CARDS_RANDOM @@ -1440,6 +1443,8 @@ and due < :now""", now=time.time()) card.combinedDue = due self.flushMod() cards.append(card) + # update card q/a + fact.setModified(True, self) self.updateFactTags([fact.id]) # this will call reset() which will update counts self.updatePriorities([c.id for c in cards]) @@ -1496,13 +1501,17 @@ where factId = :fid and cardModelId = :cmid""", fid=fact.id, cmid=cardModel.id) == 0: # enough for 10 card models assuming 0.00001 timer precision card = anki.cards.Card( - fact, cardModel, created=fact.created+0.0001*cardModel.ordinal) + fact, cardModel, + fact.created+0.0001*cardModel.ordinal) self.updateCardTags([card.id]) self.updatePriority(card) self.cardCount += 1 self.newCount += 1 ids.append(card.id) - self.setModified() + + if ids: + fact.setModified(textChanged=True, deck=self) + self.setModified() return ids def factIsInvalid(self, fact): @@ -1565,6 +1574,7 @@ where facts.id not in (select distinct factId from cards)""") for cardModel in cms: card = anki.cards.Card(fact, cardModel) cards.append(card) + fact.setModified(textChanged=True, deck=self, media=False) return cards def cloneFact(self, oldFact): @@ -1951,10 +1961,10 @@ and c.id in %s""" % ids2str(ids)) else: mod = "" # tags + cids = ids2str([x[0] for x in ids]) tags = dict([(x[0], x[1:]) for x in self.splitTagsList( - where="and cards.id in %s" % - ids2str([x[0] for x in ids]))]) + where="and cards.id in %s" % cids)]) facts = {} # fields for k, g in groupby(self.s.all(""" @@ -1968,9 +1978,33 @@ order by fields.factId""" % ids2str([x[2] for x in ids])), cms = {} for c in self.s.query(CardModel).all(): cms[c.id] = c - pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid]) + pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid], self) for (cid, cmid, fid, mid) in ids] if pend: + # find existing media references + files = {} + for txt in self.s.column0( + "select question || answer from cards where id in %s" % + cids): + for f in mediaFiles(txt): + if f in files: + files[f] -= 1 + else: + files[f] = -1 + # determine ref count delta + for p in pend: + for type in ("question", "answer"): + txt = p[type] + for f in mediaFiles(txt): + if f in files: + files[f] += 1 + else: + files[f] = 1 + # update references - this could be more efficient + for (f, cnt) in files.items(): + if not cnt: + continue + updateMediaCount(self, f, cnt) # update q/a self.s.execute(""" update cards set @@ -1979,7 +2013,8 @@ order by fields.factId""" % ids2str([x[2] for x in ids])), where id = :id""" % mod, pend) # update fields cache self.updateFieldCache(facts.keys()) - self.flushMod() + if dirty: + self.flushMod() def updateFieldCache(self, fids): "Add stripped HTML cache for sorting/searching." @@ -3018,7 +3053,7 @@ where key = :key""", key=key, value=value): if not self.tmpMediaDir and create: self.tmpMediaDir = tempfile.mkdtemp(prefix="anki") dir = self.tmpMediaDir - if not os.path.exists(dir): + if not dir or not os.path.exists(dir): return None # change to the current dir os.chdir(dir) @@ -3090,6 +3125,7 @@ Return new path, relative to media dir.""" self.s = None def setModified(self, newTime=None): + #import traceback; traceback.print_stack() self.modified = newTime or time.time() def flushMod(self): @@ -3878,6 +3914,7 @@ order by priority desc, due desc""") # we're opening a shared deck with no indices - we'll need # them if we want to rebuild the queue DeckStorage._addIndices(deck) + oldmod = deck.modified else: prog = False deck.path = path @@ -3996,10 +4033,6 @@ select filename, size, created, originalPath, description from media""") deck.s.statements(""" insert into media values ( :id, :filename, :size, :created, :originalPath, :description)""", h) - # rerun check - anki.media.rebuildMediaDir(deck, dirty=False) - # no need to track deleted media yet - deck.s.execute("delete from mediaDeleted") deck.version = 9 if deck.version < 10: deck.s.statement(""" @@ -4211,7 +4244,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""") deck.failedCardMax = 0 deck.version = 37 deck.s.commit() - # skip 38 if deck.version < 39: deck.reset() # manually suspend all suspended cards @@ -4232,7 +4264,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""") deck.s.statement("update models set features = ''") deck.version = 40 deck.s.commit() - # skip 41 if deck.version < 42: deck.version = 42 deck.s.commit() @@ -4270,7 +4301,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""") DeckStorage._addIndices(deck) deck.version = 50 deck.s.commit() - # skip 51 if deck.version < 52: dname = deck.name() sname = deck.syncName @@ -4329,6 +4359,11 @@ update cards set due = created, combinedDue = created where relativeDelay = 2""") deck.version = 58 deck.s.commit() + if deck.version < 60: + # rebuild the media db based on new format + rebuildMediaDir(deck, dirty=False) + deck.version = 60 + deck.s.commit() # executing a pragma here is very slow on large decks, so we store # our own record if not deck.getInt("pageSize") == 4096: @@ -4339,6 +4374,7 @@ where relativeDelay = 2""") deck.setVar("pageSize", 4096, mod=False) deck.s.commit() if prog: + assert deck.modified == oldmod deck.finishProgress() return deck _upgradeDeck = staticmethod(_upgradeDeck) diff --git a/anki/facts.py b/anki/facts.py index d95545eae..1f9df2abb 100644 --- a/anki/facts.py +++ b/anki/facts.py @@ -129,19 +129,15 @@ class Fact(object): def focusLost(self, field): runHook('fact.focusLost', self, field) - def setModified(self, textChanged=False): + def setModified(self, textChanged=False, deck=None, media=True): "Mark modified and update cards." self.modified = time.time() if textChanged: - d = {} - for f in self.model.fieldModels: - d[f.name] = (f.id, self[f.name]) - self.spaceUntil = stripHTMLMedia(u" ".join([x[1] for x in d.values()])) + assert deck + self.spaceUntil = stripHTMLMedia(u" ".join( + self.values())) for card in self.cards: - qa = formatQA(None, self.modelId, d, card.splitTags(), card.cardModel) - card.question = qa['question'] - card.answer = qa['answer'] - card.setModified() + card.rebuildQA(deck) # Fact deletions ########################################################################## diff --git a/anki/media.py b/anki/media.py index bd41ddbbc..3a4d9d47a 100644 --- a/anki/media.py +++ b/anki/media.py @@ -8,16 +8,13 @@ Media support """ __docformat__ = 'restructuredtext' -import os, stat, time, shutil, re, sys, urllib2 +import os, shutil, re, urllib2, time from anki.db import * -from anki.facts import Fact -from anki.utils import addTags, genID, ids2str, checksum +from anki.utils import checksum, genID from anki.lang import _ -regexps = (("(\[sound:([^]]+)\])", - "[sound:%s]"), - ("(]+)[\"']? ?/?>)", - "")) +regexps = ("(\[sound:([^]]+)\])", + "(]+)[\"']? ?/?>)") # Tables ########################################################################## @@ -26,9 +23,15 @@ mediaTable = Table( 'media', metadata, Column('id', Integer, primary_key=True, nullable=False), Column('filename', UnicodeText, nullable=False), + # reused as reference count Column('size', Integer, nullable=False), + # treated as modification date, not creation date Column('created', Float, nullable=False), + # reused as md5sum. empty string if file doesn't exist on disk Column('originalPath', UnicodeText, nullable=False, default=u""), + # older versions stored original filename here, so we'll leave it for now + # in case we add a feature to rename media back to its original name. in + # the future we may want to zero this to save space Column('description', UnicodeText, nullable=False, default=u"")) class Media(object): @@ -42,258 +45,179 @@ mediaDeletedTable = Table( nullable=False), Column('deletedTime', Float, nullable=False)) -# Helper functions +# File handling ########################################################################## -def mediaFilename(path): - "Return checksum.ext for path" - new = checksum(open(path, "rb").read()) - ext = os.path.splitext(path)[1].lower() - return "%s%s" % (new, ext) - def copyToMedia(deck, path): """Copy PATH to MEDIADIR, and return new filename. -Update media table. If file already exists, don't copy.""" - origPath = path - description = os.path.splitext(os.path.basename(path))[0] - newBase = mediaFilename(path) - new = os.path.join(deck.mediaDir(create=True), newBase) - # copy if not existing - if not os.path.exists(new): - if new.lower() == path.lower(): - # case insensitive filesystems suck - os.rename(path, new) + +If a file with the same md5sum exists in the DB, return that. +If a file with the same name exists, return a unique name. +This does not modify the media table.""" + # see if have duplicate contents + newpath = deck.s.scalar( + "select filename from media where originalPath = :cs", + cs=checksum(open(path, "rb").read())) + # check if this filename already exists + if not newpath: + base = os.path.basename(path) + mdir = deck.mediaDir(create=True) + newpath = uniquePath(mdir, base) + shutil.copy2(path, newpath) + return os.path.basename(newpath) + +def uniquePath(dir, base): + # remove any dangerous characters + base = re.sub(r"[][<>:/\\]", "", base) + # find a unique name + (root, ext) = os.path.splitext(base) + def repl(match): + n = int(match.group(1)) + return " (%d)" % (n+1) + while True: + path = os.path.join(dir, root + ext) + if not os.path.exists(path): + break + reg = " \((\d+)\)$" + if not re.search(reg, root): + root = root + " (1)" else: - shutil.copy2(path, new) - newSize = os.stat(new)[stat.ST_SIZE] - if not deck.s.scalar( - "select 1 from media where filename = :f", - f=newBase): - # if the user has modified a hashed file, try to remember the old - # filename - old = deck.s.scalar( - "select originalPath from media where filename = :s", - s=os.path.basename(origPath)) - if old: - origPath = old - description = os.path.splitext(os.path.basename(origPath))[0] + root = re.sub(reg, repl, root) + return path + +# DB routines +########################################################################## + +def updateMediaCount(deck, file, count=1): + mdir = deck.mediaDir() + if deck.s.scalar( + "select 1 from media where filename = :file", file=file): + deck.s.statement( + "update media set size = size + :c, created = :t where filename = :file", + file=file, c=count, t=time.time()) + elif count > 0: try: - path = unicode(path, sys.getfilesystemencoding()) - except TypeError: - pass + sum = unicode( + checksum(open(os.path.join(mdir, file), "rb").read())) + except: + sum = u"" deck.s.statement(""" -insert into media (id, filename, size, created, originalPath, -description) -values (:id, :filename, :size, :created, :originalPath, -:description)""", - id=genID(), - filename=newBase, - size=newSize, - created=time.time(), - originalPath=origPath, - description=description) - deck.flushMod() - return newBase +insert into media (id, filename, size, created, originalPath, description) +values (:id, :file, :c, :mod, :sum, '')""", + id=genID(), file=file, c=count, mod=time.time(), + sum=sum) -def _modifyFields(deck, fieldsToUpdate, modifiedFacts, dirty): - factIds = ids2str(modifiedFacts.keys()) - if fieldsToUpdate: - deck.s.execute("update fields set value = :val where id = :id", - fieldsToUpdate) - deck.s.statement( - "update facts set modified = :time where id in %s" % - factIds, time=time.time()) - ids = deck.s.all("""select cards.id, cards.cardModelId, facts.id, -facts.modelId from cards, facts where -cards.factId = facts.id and facts.id in %s""" - % factIds) - deck.updateCardQACache(ids, dirty) - deck.flushMod() +def removeUnusedMedia(deck): + ids = deck.s.column0("select id from media where size = 0") + for id in ids: + deck.s.statement("insert into mediaDeleted values (:id, :t)", + id=id, t=time.time()) + deck.s.statement("delete from media where size = 0") +# String manipulation +########################################################################## -def mediaRefs(string): - "Return list of (fullMatch, filename, replacementString)." +def mediaFiles(string): l = [] - for (reg, repl) in regexps: + for reg in regexps: for (full, fname) in re.findall(reg, string): - l.append((full, fname, repl)) + l.append(fname) return l def stripMedia(txt): - for (reg, x) in regexps: + for reg in regexps: txt = re.sub(reg, "", txt) return txt # Rebuilding DB ########################################################################## -def rebuildMediaDir(deck, deleteRefs=False, dirty=True): - "Delete references to missing files, delete unused files." - localFiles = {} - modifiedFacts = {} - unmodifiedFacts = {} - renamedFiles = {} - existingFiles = {} - factsMissingMedia = {} - updateFields = [] - usedFiles = {} - unusedFileCount = 0 - missingFileCount = 0 - deck.mediaDir(create=True) - deck.startProgress(16, 0, _("Check Media DB")) - # rename all files to checksum versions, note non-renamed ones - deck.updateProgress(_("Checksum files...")) - files = os.listdir(unicode(deck.mediaDir())) - mod = len(files) / 10 - for c, oldBase in enumerate(files): - if mod and not c % mod: - deck.updateProgress() - if oldBase.startswith("latex-"): - continue - oldPath = os.path.join(deck.mediaDir(), oldBase) - if oldBase.startswith("."): - continue - if os.path.isdir(oldPath): - continue - newBase = copyToMedia(deck, oldPath) - if oldBase.lower() == newBase.lower(): - existingFiles[oldBase] = 1 - else: - renamedFiles[oldBase] = newBase - deck.updateProgress(value=10) - # now look through all fields, and update references to files - deck.updateProgress(_("Scan fields...")) - for (id, fid, val) in deck.s.all( - "select id, factId, value from fields"): - oldval = val - for (full, fname, repl) in mediaRefs(val): - if fname in renamedFiles: - # renamed - newBase = renamedFiles[fname] - val = re.sub(re.escape(full), repl % newBase, val) - usedFiles[newBase] = 1 - elif fname in existingFiles: - # used & current - usedFiles[fname] = 1 - else: - # missing - missingFileCount += 1 - if deleteRefs: - val = re.sub(re.escape(full), "", val) +def rebuildMediaDir(deck, delete=False, dirty=True): + deck.startProgress(title=_("Check Media DB")) + mdir = deck.mediaDir(create=True) + # set all ref counts to 0 + deck.s.statement("update media set size = 0") + # look through cards for media references + refs = {} + for (question, answer) in deck.s.all( + "select question, answer from cards"): + for txt in (question, answer): + for f in mediaFiles(txt): + if f in refs: + refs[f] += 1 else: - factsMissingMedia[fid] = 1 - if val != oldval: - updateFields.append({'id': id, 'val': val}) - modifiedFacts[fid] = 1 - else: - if fid not in factsMissingMedia: - unmodifiedFacts[fid] = 1 - # update modified fields - deck.updateProgress(_("Modify fields...")) - if modifiedFacts: - _modifyFields(deck, updateFields, modifiedFacts, dirty) - # fix tags - deck.updateProgress(_("Update tags...")) - if dirty: - deck.deleteTags(unmodifiedFacts.keys(), _("MediaMissing")) - if deleteRefs: - deck.deleteTags(modifiedFacts.keys(), _("MediaMissing")) - else: - deck.addTags(factsMissingMedia.keys(), _("MediaMissing")) - # build cache of db records - deck.updateProgress(_("Delete unused files...")) - mediaIds = dict(deck.s.all("select filename, id from media")) - # look through the media dir for any unused files, and delete - for f in os.listdir(unicode(deck.mediaDir())): - if f.startswith("."): + refs[f] = 1 + # update ref counts + for (file, count) in refs.items(): + updateMediaCount(deck, file, count) + # find unused media + unused = [] + for file in os.listdir(mdir): + path = os.path.join(mdir, file) + if not os.path.isfile(path): + # ignore directories continue - if f.startswith("latex-"): - continue - path = os.path.join(deck.mediaDir(), f) - if os.path.isdir(path): - shutil.rmtree(path) - continue - if f in usedFiles: - try: - del mediaIds[f] - except: - pass # case errors - else: + if file not in refs: + unused.append(file) + # optionally delete + if delete: + removeUnusedMedia(deck) + for f in unused: + path = os.path.join(mdir, f) os.unlink(path) - unusedFileCount += 1 - deck.updateProgress(_("Delete stale references...")) - for (fname, id) in mediaIds.items(): - # maybe delete from db - if id: - deck.s.statement("delete from media where id = :id", id=id) - deck.s.statement(""" -insert into mediaDeleted (mediaId, deletedTime) -values (:id, strftime('%s', 'now'))""", id=id) - # update deck and save - deck.flushMod() - deck.save() + # check md5s are up to date + update = [] + for (file, created, md5) in deck.s.all( + "select filename, created, originalPath from media"): + path = os.path.join(mdir, file) + if not os.path.exists(path): + if md5: + update.append({'f':file, 'sum':u"", 'c':time.time()}) + else: + sum = unicode( + checksum(open(os.path.join(mdir, file), "rb").read())) + if md5 != sum: + update.append({'f':file, 'sum':sum, 'c':time.time()}) + if update: + deck.s.statements(""" +update media set originalPath = :sum, created = :c where filename = :f""", + update) + # update deck and get return info + if dirty: + deck.flushMod() + have = deck.s.scalar("select count() from media where originalPath != ''") + nohave = deck.s.column0("select filename from media where originalPath = ''") deck.finishProgress() - return missingFileCount, unusedFileCount - len(renamedFiles) + return (have, nohave, unused) # Download missing ########################################################################## def downloadMissing(deck): - from anki.latex import renderLatex - urls = dict( - deck.s.all("select id, features from models where features != ''")) - if not urls: + urlbase = deck.getVar("mediaURL") + if not urlbase: return None mdir = deck.mediaDir(create=True) deck.startProgress() - missing = {} - for (id, fid, val, mid) in deck.s.all(""" -select fields.id, factId, value, modelId from fields, facts -where facts.id = fields.factId"""): - # add latex tags - val = renderLatex(deck, val, False) - for (full, fname, repl) in mediaRefs(val): - if not os.path.exists(os.path.join(mdir, fname)) and mid in urls: - missing[fname] = mid - for c, file in enumerate(missing.keys()): - deck.updateProgress(label=_("Downloading %(a)d of %(b)d...") % { - 'a': c, - 'b': len(missing), - }) - try: - path = urls[missing[file]] + file - url = urllib2.urlopen(path) - open(file, "wb").write(url.read()) - except: - deck.finishProgress() - return (False, path) + missing = 0 + grabbed = 0 + for c, (f, sum) in enumerate(deck.s.all( + "select filename, not not originalPath from media")): + path = os.path.join(mdir, f) + if not os.path.exists(path): + try: + rpath = urlbase + f + url = urllib2.urlopen(rpath) + open(f, "wb").write(url.read()) + grabbed += 1 + except: + if sum: + # the file is supposed to exist + deck.finishProgress() + return (False, rpath) + else: + # ignore and keep going + missing += 1 + deck.updateProgress(label=_("File %d...") % (grabbed+missing)) deck.finishProgress() - return (True, len(missing)) - -# Export original files -########################################################################## - -def exportOriginalFiles(deck): - deck.startProgress() - origDir = deck.mediaDir(create=True) - newDir = origDir.replace(".media", ".originals") - try: - os.mkdir(newDir) - except (IOError, OSError): - pass - cnt = 0 - for row in deck.s.all("select filename, originalPath from media"): - (fname, path) = row - base = os.path.basename(path) - if base == fname: - continue - cnt += 1 - deck.updateProgress(label="Exporting %s" % base) - old = os.path.join(origDir, fname) - new = os.path.join(newDir, base) - if os.path.exists(new): - new = re.sub("(.*)(\..*?)$", "\\1-%s\\2" % - os.path.splitext(fname)[0], new) - shutil.copy2(old, new) - deck.finishProgress() - return cnt + return (True, grabbed, missing) diff --git a/anki/models.py b/anki/models.py index e7abd57d3..6f08f0152 100644 --- a/anki/models.py +++ b/anki/models.py @@ -132,7 +132,7 @@ class CardModel(object): mapper(CardModel, cardModelsTable) -def formatQA(cid, mid, fact, tags, cm): +def formatQA(cid, mid, fact, tags, cm, deck): "Return a dict of {id, question, answer}" d = {'id': cid} fields = {} @@ -154,9 +154,9 @@ def formatQA(cid, mid, fact, tags, cm): # convert old style format = re.sub("%\((.+?)\)s", "{{\\1}}", format) # allow custom rendering functions & info - fields = runFilter("prepareFields", fields, cid, mid, fact, tags, cm) + fields = runFilter("prepareFields", fields, cid, mid, fact, tags, cm, deck) html = render(format, fields) - d[type] = runFilter("formatQA", html, type, cid, mid, fact, tags, cm) + d[type] = runFilter("formatQA", html, type, cid, mid, fact, tags, cm, deck) return d # Model table diff --git a/anki/sync.py b/anki/sync.py index 21cac830e..9e25f6ebb 100644 --- a/anki/sync.py +++ b/anki/sync.py @@ -35,7 +35,7 @@ from anki.stats import Stats, globalStats from anki.history import CardHistoryEntry from anki.stats import globalStats from anki.utils import ids2str, hexifyID, checksum -from anki.media import mediaRefs +from anki.media import mediaFiles from anki.lang import _ from hooks import runHook @@ -1204,7 +1204,7 @@ select %(c)s from cards where %(c)s like '%%" + f['Back'] = u"back [sound:foo.jpg]" + deck.addFact(f) + # 1 entry in the media db, with two references, and missing file + assert deck.s.scalar("select count() from media") == 1 + assert deck.s.scalar("select size from media") == 2 + assert deck.s.scalar("select not originalPath from media") + # copy to media folder & check db + path = m.copyToMedia(deck, path) + m.rebuildMediaDir(deck) + # md5 should be set now + assert deck.s.scalar("select count() from media") == 1 + assert deck.s.scalar("select size from media") == 2 + assert deck.s.scalar("select originalPath from media") + # edit the fact to remove a reference + f['Back'] = u"" + f.setModified(True, deck) + deck.s.flush() + assert deck.s.scalar("select count() from media") == 1 + assert deck.s.scalar("select size from media") == 1 + # remove the front reference too + f['Front'] = u"" + f.setModified(True, deck) + assert deck.s.scalar("select size from media") == 0 + # add the reference back + f['Front'] = u"" + f.setModified(True, deck) + assert deck.s.scalar("select size from media") == 1 + # detect file modifications + oldsum = deck.s.scalar("select originalPath from media") + open(path, "w").write("world") + m.rebuildMediaDir(deck) + newsum = deck.s.scalar("select originalPath from media") + assert newsum and newsum != oldsum + # delete underlying file and check db + os.unlink(path) + m.rebuildMediaDir(deck) + # md5 should be gone again + assert deck.s.scalar("select count() from media") == 1 + assert deck.s.scalar("select not originalPath from media") + # media db should pick up media defined via templates & bulk update + f['Back'] = u"bar.jpg" + f.setModified(True, deck) + deck.s.flush() + # modify template & regenerate + assert deck.s.scalar("select count() from media") == 1 + assert deck.s.scalar("select sum(size) from media") == 1 + deck.currentModel.cardModels[0].aformat=u'' + deck.updateCardsFromModel(deck.currentModel) + assert deck.s.scalar("select sum(size) from media") == 2 + assert deck.s.scalar("select count() from media") == 2 + deck.currentModel.cardModels[0].aformat=u'{{{Back}}}' + deck.updateCardsFromModel(deck.currentModel) + assert deck.s.scalar("select count() from media") == 2 + assert deck.s.scalar("select sum(size) from media") == 1 diff --git a/tests/test_sync.py b/tests/test_sync.py index 692b9ab7e..5b4f9553a 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -254,8 +254,8 @@ def test_localsync_media(): os.unlink(os.path.join(deck1media, "22161b29b0c18e068038021f54eee1ee.png")) rebuildMediaDir(deck1) client.sync() - assert deck1.s.scalar("select count(1) from media") == 2 - assert deck2.s.scalar("select count(1) from media") == 2 + assert deck1.s.scalar("select count(1) from media") == 3 + assert deck2.s.scalar("select count(1) from media") == 3 # One way syncing ##########################################################################