rewrite media support

- media is no longer hashed, and instead stored in the db using its original
  name
- when adding media, its checksum is calculated and used to look for
  duplicates
- duplicate filenames will result in a number tacked on the file
- the size column is used to count card references to media. If media is
  referenced in a fact but not the question or answer, the count will be zero.
- there is no guarantee media will be listed in the media db if it is unused
  on the question & answer
- if rebuildMediaDir(delete=True), then entries with zero references are
  deleted, along with any unused files in the media dir.
- rebuildMediaDir() will update the internal checksums, and set the checksum
  to "" if a file can't be found
- rebuildMediaDir() is a lot less destructive now, and will leave alone
  directories it finds in the media folder (but not look in them either)
- rebuildMediaDir() returns more information about the state of media now
- the online and mobile clients will need to to make sure that when
  downloading media, entries with no checksum are non-fatal and should not
  abort the download process.
- the ref count is updated every time the q/a is updated - so the db should be
  up to date after every add/edit/import
- since we look for media on the q/a now, card templates like '<img
  src="{{{field}}}">' will work now
- export original files as gone as it is not needed anymore
- move from per-model media URL to deckVar. downloadMissingMedia() uses this
  now. Deck subscriptions will have to be updated to share media another way.
- pass deck in formatQA, as latex support is going to change
This commit is contained in:
Damien Elmes 2010-12-11 00:51:48 +09:00
parent c4c9847028
commit 0c9672e7b8
9 changed files with 358 additions and 270 deletions

View file

@ -29,7 +29,7 @@ Edit the card:
fields = card.fact.model.fieldModels fields = card.fact.model.fieldModels
for field in fields: for field in fields:
card.fact[field.name] = "newvalue" card.fact[field.name] = "newvalue"
card.fact.setModified(textChanged=True) card.fact.setModified(textChanged=True, deck=deck)
deck.setModified() deck.setModified()
Get all cards via ORM (slow): Get all cards via ORM (slow):

View file

@ -13,6 +13,7 @@ from anki.db import *
from anki.models import CardModel, Model, FieldModel, formatQA from anki.models import CardModel, Model, FieldModel, formatQA
from anki.facts import Fact, factsTable, Field from anki.facts import Fact, factsTable, Field
from anki.utils import parseTags, findTag, stripHTML, genID, hexifyID from anki.utils import parseTags, findTag, stripHTML, genID, hexifyID
from anki.media import updateMediaCount, mediaFiles
# Cards # Cards
########################################################################## ##########################################################################
@ -92,12 +93,37 @@ class Card(object):
# for non-orm use # for non-orm use
self.cardModelId = cardModel.id self.cardModelId = cardModel.id
self.ordinal = cardModel.ordinal self.ordinal = cardModel.ordinal
d = {}
for f in self.fact.model.fieldModels: def rebuildQA(self, deck, media=True):
d[f.name] = (f.id, self.fact[f.name]) # format qa
qa = formatQA(None, fact.modelId, d, self.splitTags(), cardModel) d = {}
self.question = qa['question'] for f in self.fact.model.fieldModels:
self.answer = qa['answer'] d[f.name] = (f.id, self.fact[f.name])
qa = formatQA(None, self.fact.modelId, d, self.splitTags(),
self.cardModel, deck)
# find old media references
files = {}
for type in ("question", "answer"):
for f in mediaFiles(getattr(self, type) or ""):
if f in files:
files[f] -= 1
else:
files[f] = -1
# update q/a
self.question = qa['question']
self.answer = qa['answer']
# determine media delta
for type in ("question", "answer"):
for f in mediaFiles(getattr(self, type)):
if f in files:
files[f] += 1
else:
files[f] = 1
# update media counts if we're attached to deck
if media:
for (f, cnt) in files.items():
updateMediaCount(deck, f, cnt)
self.setModified()
def setModified(self): def setModified(self):
self.modified = time.time() self.modified = time.time()

View file

@ -26,8 +26,10 @@ from operator import itemgetter
from itertools import groupby from itertools import groupby
from anki.hooks import runHook, hookEmpty from anki.hooks import runHook, hookEmpty
from anki.template import render from anki.template import render
from anki.media import updateMediaCount, mediaFiles, \
rebuildMediaDir
# ensure all the metadata in other files is loaded before proceeding # ensure all the DB metadata in other files is loaded before proceeding
import anki.models, anki.facts, anki.cards, anki.stats import anki.models, anki.facts, anki.cards, anki.stats
import anki.history, anki.media import anki.history, anki.media
@ -69,7 +71,7 @@ SEARCH_FIELD = 6
SEARCH_FIELD_EXISTS = 7 SEARCH_FIELD_EXISTS = 7
SEARCH_QA = 8 SEARCH_QA = 8
SEARCH_PHRASE_WB = 9 SEARCH_PHRASE_WB = 9
DECK_VERSION = 58 DECK_VERSION = 60
deckVarsTable = Table( deckVarsTable = Table(
'deckVars', metadata, 'deckVars', metadata,
@ -161,7 +163,7 @@ class Deck(object):
self.lastSessionStart = 0 self.lastSessionStart = 0
self.queueLimit = 200 self.queueLimit = 200
# if most recent deck var not defined, make sure defaults are set # if most recent deck var not defined, make sure defaults are set
if not self.s.scalar("select 1 from deckVars where key = 'newSpacing'"): if not self.s.scalar("select 1 from deckVars where key = 'mediaURL'"):
self.setVarDefault("suspendLeeches", True) self.setVarDefault("suspendLeeches", True)
self.setVarDefault("leechFails", 16) self.setVarDefault("leechFails", 16)
self.setVarDefault("perDay", True) self.setVarDefault("perDay", True)
@ -170,6 +172,7 @@ class Deck(object):
self.setVarDefault("newInactive", self.suspended) self.setVarDefault("newInactive", self.suspended)
self.setVarDefault("revInactive", self.suspended) self.setVarDefault("revInactive", self.suspended)
self.setVarDefault("newSpacing", 60) self.setVarDefault("newSpacing", 60)
self.setVarDefault("mediaURL", "")
self.updateCutoff() self.updateCutoff()
self.setupStandardScheduler() self.setupStandardScheduler()
@ -719,6 +722,7 @@ limit %s""" % (self.cramOrder, self.queueLimit)))
card = anki.cards.Card() card = anki.cards.Card()
if not card.fromDB(self.s, id): if not card.fromDB(self.s, id):
return return
card.deck = self
card.genFuzz() card.genFuzz()
card.startTimer() card.startTimer()
return card return card
@ -898,7 +902,7 @@ and type between 1 and 2""",
tags = scard.fact.tags tags = scard.fact.tags
tags = addTags("Leech", tags) tags = addTags("Leech", tags)
scard.fact.tags = canonifyTags(tags) scard.fact.tags = canonifyTags(tags)
scard.fact.setModified(textChanged=True) scard.fact.setModified(textChanged=True, deck=self)
self.updateFactTags([scard.fact.id]) self.updateFactTags([scard.fact.id])
self.s.flush() self.s.flush()
self.s.expunge(scard) self.s.expunge(scard)
@ -1425,7 +1429,6 @@ and due < :now""", now=time.time())
cards = [] cards = []
self.s.save(fact) self.s.save(fact)
# update field cache # update field cache
fact.setModified(True)
self.factCount += 1 self.factCount += 1
self.flushMod() self.flushMod()
isRandom = self.newCardOrder == NEW_CARDS_RANDOM isRandom = self.newCardOrder == NEW_CARDS_RANDOM
@ -1440,6 +1443,8 @@ and due < :now""", now=time.time())
card.combinedDue = due card.combinedDue = due
self.flushMod() self.flushMod()
cards.append(card) cards.append(card)
# update card q/a
fact.setModified(True, self)
self.updateFactTags([fact.id]) self.updateFactTags([fact.id])
# this will call reset() which will update counts # this will call reset() which will update counts
self.updatePriorities([c.id for c in cards]) self.updatePriorities([c.id for c in cards])
@ -1496,13 +1501,17 @@ where factId = :fid and cardModelId = :cmid""",
fid=fact.id, cmid=cardModel.id) == 0: fid=fact.id, cmid=cardModel.id) == 0:
# enough for 10 card models assuming 0.00001 timer precision # enough for 10 card models assuming 0.00001 timer precision
card = anki.cards.Card( card = anki.cards.Card(
fact, cardModel, created=fact.created+0.0001*cardModel.ordinal) fact, cardModel,
fact.created+0.0001*cardModel.ordinal)
self.updateCardTags([card.id]) self.updateCardTags([card.id])
self.updatePriority(card) self.updatePriority(card)
self.cardCount += 1 self.cardCount += 1
self.newCount += 1 self.newCount += 1
ids.append(card.id) ids.append(card.id)
self.setModified()
if ids:
fact.setModified(textChanged=True, deck=self)
self.setModified()
return ids return ids
def factIsInvalid(self, fact): def factIsInvalid(self, fact):
@ -1565,6 +1574,7 @@ where facts.id not in (select distinct factId from cards)""")
for cardModel in cms: for cardModel in cms:
card = anki.cards.Card(fact, cardModel) card = anki.cards.Card(fact, cardModel)
cards.append(card) cards.append(card)
fact.setModified(textChanged=True, deck=self, media=False)
return cards return cards
def cloneFact(self, oldFact): def cloneFact(self, oldFact):
@ -1951,10 +1961,10 @@ and c.id in %s""" % ids2str(ids))
else: else:
mod = "" mod = ""
# tags # tags
cids = ids2str([x[0] for x in ids])
tags = dict([(x[0], x[1:]) for x in tags = dict([(x[0], x[1:]) for x in
self.splitTagsList( self.splitTagsList(
where="and cards.id in %s" % where="and cards.id in %s" % cids)])
ids2str([x[0] for x in ids]))])
facts = {} facts = {}
# fields # fields
for k, g in groupby(self.s.all(""" for k, g in groupby(self.s.all("""
@ -1968,9 +1978,33 @@ order by fields.factId""" % ids2str([x[2] for x in ids])),
cms = {} cms = {}
for c in self.s.query(CardModel).all(): for c in self.s.query(CardModel).all():
cms[c.id] = c cms[c.id] = c
pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid]) pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid], self)
for (cid, cmid, fid, mid) in ids] for (cid, cmid, fid, mid) in ids]
if pend: if pend:
# find existing media references
files = {}
for txt in self.s.column0(
"select question || answer from cards where id in %s" %
cids):
for f in mediaFiles(txt):
if f in files:
files[f] -= 1
else:
files[f] = -1
# determine ref count delta
for p in pend:
for type in ("question", "answer"):
txt = p[type]
for f in mediaFiles(txt):
if f in files:
files[f] += 1
else:
files[f] = 1
# update references - this could be more efficient
for (f, cnt) in files.items():
if not cnt:
continue
updateMediaCount(self, f, cnt)
# update q/a # update q/a
self.s.execute(""" self.s.execute("""
update cards set update cards set
@ -1979,7 +2013,8 @@ order by fields.factId""" % ids2str([x[2] for x in ids])),
where id = :id""" % mod, pend) where id = :id""" % mod, pend)
# update fields cache # update fields cache
self.updateFieldCache(facts.keys()) self.updateFieldCache(facts.keys())
self.flushMod() if dirty:
self.flushMod()
def updateFieldCache(self, fids): def updateFieldCache(self, fids):
"Add stripped HTML cache for sorting/searching." "Add stripped HTML cache for sorting/searching."
@ -3018,7 +3053,7 @@ where key = :key""", key=key, value=value):
if not self.tmpMediaDir and create: if not self.tmpMediaDir and create:
self.tmpMediaDir = tempfile.mkdtemp(prefix="anki") self.tmpMediaDir = tempfile.mkdtemp(prefix="anki")
dir = self.tmpMediaDir dir = self.tmpMediaDir
if not os.path.exists(dir): if not dir or not os.path.exists(dir):
return None return None
# change to the current dir # change to the current dir
os.chdir(dir) os.chdir(dir)
@ -3090,6 +3125,7 @@ Return new path, relative to media dir."""
self.s = None self.s = None
def setModified(self, newTime=None): def setModified(self, newTime=None):
#import traceback; traceback.print_stack()
self.modified = newTime or time.time() self.modified = newTime or time.time()
def flushMod(self): def flushMod(self):
@ -3878,6 +3914,7 @@ order by priority desc, due desc""")
# we're opening a shared deck with no indices - we'll need # we're opening a shared deck with no indices - we'll need
# them if we want to rebuild the queue # them if we want to rebuild the queue
DeckStorage._addIndices(deck) DeckStorage._addIndices(deck)
oldmod = deck.modified
else: else:
prog = False prog = False
deck.path = path deck.path = path
@ -3996,10 +4033,6 @@ select filename, size, created, originalPath, description from media""")
deck.s.statements(""" deck.s.statements("""
insert into media values ( insert into media values (
:id, :filename, :size, :created, :originalPath, :description)""", h) :id, :filename, :size, :created, :originalPath, :description)""", h)
# rerun check
anki.media.rebuildMediaDir(deck, dirty=False)
# no need to track deleted media yet
deck.s.execute("delete from mediaDeleted")
deck.version = 9 deck.version = 9
if deck.version < 10: if deck.version < 10:
deck.s.statement(""" deck.s.statement("""
@ -4211,7 +4244,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
deck.failedCardMax = 0 deck.failedCardMax = 0
deck.version = 37 deck.version = 37
deck.s.commit() deck.s.commit()
# skip 38
if deck.version < 39: if deck.version < 39:
deck.reset() deck.reset()
# manually suspend all suspended cards # manually suspend all suspended cards
@ -4232,7 +4264,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
deck.s.statement("update models set features = ''") deck.s.statement("update models set features = ''")
deck.version = 40 deck.version = 40
deck.s.commit() deck.s.commit()
# skip 41
if deck.version < 42: if deck.version < 42:
deck.version = 42 deck.version = 42
deck.s.commit() deck.s.commit()
@ -4270,7 +4301,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
DeckStorage._addIndices(deck) DeckStorage._addIndices(deck)
deck.version = 50 deck.version = 50
deck.s.commit() deck.s.commit()
# skip 51
if deck.version < 52: if deck.version < 52:
dname = deck.name() dname = deck.name()
sname = deck.syncName sname = deck.syncName
@ -4329,6 +4359,11 @@ update cards set due = created, combinedDue = created
where relativeDelay = 2""") where relativeDelay = 2""")
deck.version = 58 deck.version = 58
deck.s.commit() deck.s.commit()
if deck.version < 60:
# rebuild the media db based on new format
rebuildMediaDir(deck, dirty=False)
deck.version = 60
deck.s.commit()
# executing a pragma here is very slow on large decks, so we store # executing a pragma here is very slow on large decks, so we store
# our own record # our own record
if not deck.getInt("pageSize") == 4096: if not deck.getInt("pageSize") == 4096:
@ -4339,6 +4374,7 @@ where relativeDelay = 2""")
deck.setVar("pageSize", 4096, mod=False) deck.setVar("pageSize", 4096, mod=False)
deck.s.commit() deck.s.commit()
if prog: if prog:
assert deck.modified == oldmod
deck.finishProgress() deck.finishProgress()
return deck return deck
_upgradeDeck = staticmethod(_upgradeDeck) _upgradeDeck = staticmethod(_upgradeDeck)

View file

@ -129,19 +129,15 @@ class Fact(object):
def focusLost(self, field): def focusLost(self, field):
runHook('fact.focusLost', self, field) runHook('fact.focusLost', self, field)
def setModified(self, textChanged=False): def setModified(self, textChanged=False, deck=None, media=True):
"Mark modified and update cards." "Mark modified and update cards."
self.modified = time.time() self.modified = time.time()
if textChanged: if textChanged:
d = {} assert deck
for f in self.model.fieldModels: self.spaceUntil = stripHTMLMedia(u" ".join(
d[f.name] = (f.id, self[f.name]) self.values()))
self.spaceUntil = stripHTMLMedia(u" ".join([x[1] for x in d.values()]))
for card in self.cards: for card in self.cards:
qa = formatQA(None, self.modelId, d, card.splitTags(), card.cardModel) card.rebuildQA(deck)
card.question = qa['question']
card.answer = qa['answer']
card.setModified()
# Fact deletions # Fact deletions
########################################################################## ##########################################################################

View file

@ -8,16 +8,13 @@ Media support
""" """
__docformat__ = 'restructuredtext' __docformat__ = 'restructuredtext'
import os, stat, time, shutil, re, sys, urllib2 import os, shutil, re, urllib2, time
from anki.db import * from anki.db import *
from anki.facts import Fact from anki.utils import checksum, genID
from anki.utils import addTags, genID, ids2str, checksum
from anki.lang import _ from anki.lang import _
regexps = (("(\[sound:([^]]+)\])", regexps = ("(\[sound:([^]]+)\])",
"[sound:%s]"), "(<img src=[\"']?([^\"'>]+)[\"']? ?/?>)")
("(<img src=[\"']?([^\"'>]+)[\"']? ?/?>)",
"<img src=\"%s\">"))
# Tables # Tables
########################################################################## ##########################################################################
@ -26,9 +23,15 @@ mediaTable = Table(
'media', metadata, 'media', metadata,
Column('id', Integer, primary_key=True, nullable=False), Column('id', Integer, primary_key=True, nullable=False),
Column('filename', UnicodeText, nullable=False), Column('filename', UnicodeText, nullable=False),
# reused as reference count
Column('size', Integer, nullable=False), Column('size', Integer, nullable=False),
# treated as modification date, not creation date
Column('created', Float, nullable=False), Column('created', Float, nullable=False),
# reused as md5sum. empty string if file doesn't exist on disk
Column('originalPath', UnicodeText, nullable=False, default=u""), Column('originalPath', UnicodeText, nullable=False, default=u""),
# older versions stored original filename here, so we'll leave it for now
# in case we add a feature to rename media back to its original name. in
# the future we may want to zero this to save space
Column('description', UnicodeText, nullable=False, default=u"")) Column('description', UnicodeText, nullable=False, default=u""))
class Media(object): class Media(object):
@ -42,258 +45,179 @@ mediaDeletedTable = Table(
nullable=False), nullable=False),
Column('deletedTime', Float, nullable=False)) Column('deletedTime', Float, nullable=False))
# Helper functions # File handling
########################################################################## ##########################################################################
def mediaFilename(path):
"Return checksum.ext for path"
new = checksum(open(path, "rb").read())
ext = os.path.splitext(path)[1].lower()
return "%s%s" % (new, ext)
def copyToMedia(deck, path): def copyToMedia(deck, path):
"""Copy PATH to MEDIADIR, and return new filename. """Copy PATH to MEDIADIR, and return new filename.
Update media table. If file already exists, don't copy."""
origPath = path If a file with the same md5sum exists in the DB, return that.
description = os.path.splitext(os.path.basename(path))[0] If a file with the same name exists, return a unique name.
newBase = mediaFilename(path) This does not modify the media table."""
new = os.path.join(deck.mediaDir(create=True), newBase) # see if have duplicate contents
# copy if not existing newpath = deck.s.scalar(
if not os.path.exists(new): "select filename from media where originalPath = :cs",
if new.lower() == path.lower(): cs=checksum(open(path, "rb").read()))
# case insensitive filesystems suck # check if this filename already exists
os.rename(path, new) if not newpath:
base = os.path.basename(path)
mdir = deck.mediaDir(create=True)
newpath = uniquePath(mdir, base)
shutil.copy2(path, newpath)
return os.path.basename(newpath)
def uniquePath(dir, base):
# remove any dangerous characters
base = re.sub(r"[][<>:/\\]", "", base)
# find a unique name
(root, ext) = os.path.splitext(base)
def repl(match):
n = int(match.group(1))
return " (%d)" % (n+1)
while True:
path = os.path.join(dir, root + ext)
if not os.path.exists(path):
break
reg = " \((\d+)\)$"
if not re.search(reg, root):
root = root + " (1)"
else: else:
shutil.copy2(path, new) root = re.sub(reg, repl, root)
newSize = os.stat(new)[stat.ST_SIZE] return path
if not deck.s.scalar(
"select 1 from media where filename = :f", # DB routines
f=newBase): ##########################################################################
# if the user has modified a hashed file, try to remember the old
# filename def updateMediaCount(deck, file, count=1):
old = deck.s.scalar( mdir = deck.mediaDir()
"select originalPath from media where filename = :s", if deck.s.scalar(
s=os.path.basename(origPath)) "select 1 from media where filename = :file", file=file):
if old: deck.s.statement(
origPath = old "update media set size = size + :c, created = :t where filename = :file",
description = os.path.splitext(os.path.basename(origPath))[0] file=file, c=count, t=time.time())
elif count > 0:
try: try:
path = unicode(path, sys.getfilesystemencoding()) sum = unicode(
except TypeError: checksum(open(os.path.join(mdir, file), "rb").read()))
pass except:
sum = u""
deck.s.statement(""" deck.s.statement("""
insert into media (id, filename, size, created, originalPath, insert into media (id, filename, size, created, originalPath, description)
description) values (:id, :file, :c, :mod, :sum, '')""",
values (:id, :filename, :size, :created, :originalPath, id=genID(), file=file, c=count, mod=time.time(),
:description)""", sum=sum)
id=genID(),
filename=newBase,
size=newSize,
created=time.time(),
originalPath=origPath,
description=description)
deck.flushMod()
return newBase
def _modifyFields(deck, fieldsToUpdate, modifiedFacts, dirty): def removeUnusedMedia(deck):
factIds = ids2str(modifiedFacts.keys()) ids = deck.s.column0("select id from media where size = 0")
if fieldsToUpdate: for id in ids:
deck.s.execute("update fields set value = :val where id = :id", deck.s.statement("insert into mediaDeleted values (:id, :t)",
fieldsToUpdate) id=id, t=time.time())
deck.s.statement( deck.s.statement("delete from media where size = 0")
"update facts set modified = :time where id in %s" %
factIds, time=time.time())
ids = deck.s.all("""select cards.id, cards.cardModelId, facts.id,
facts.modelId from cards, facts where
cards.factId = facts.id and facts.id in %s"""
% factIds)
deck.updateCardQACache(ids, dirty)
deck.flushMod()
# String manipulation
##########################################################################
def mediaRefs(string): def mediaFiles(string):
"Return list of (fullMatch, filename, replacementString)."
l = [] l = []
for (reg, repl) in regexps: for reg in regexps:
for (full, fname) in re.findall(reg, string): for (full, fname) in re.findall(reg, string):
l.append((full, fname, repl)) l.append(fname)
return l return l
def stripMedia(txt): def stripMedia(txt):
for (reg, x) in regexps: for reg in regexps:
txt = re.sub(reg, "", txt) txt = re.sub(reg, "", txt)
return txt return txt
# Rebuilding DB # Rebuilding DB
########################################################################## ##########################################################################
def rebuildMediaDir(deck, deleteRefs=False, dirty=True): def rebuildMediaDir(deck, delete=False, dirty=True):
"Delete references to missing files, delete unused files." deck.startProgress(title=_("Check Media DB"))
localFiles = {} mdir = deck.mediaDir(create=True)
modifiedFacts = {} # set all ref counts to 0
unmodifiedFacts = {} deck.s.statement("update media set size = 0")
renamedFiles = {} # look through cards for media references
existingFiles = {} refs = {}
factsMissingMedia = {} for (question, answer) in deck.s.all(
updateFields = [] "select question, answer from cards"):
usedFiles = {} for txt in (question, answer):
unusedFileCount = 0 for f in mediaFiles(txt):
missingFileCount = 0 if f in refs:
deck.mediaDir(create=True) refs[f] += 1
deck.startProgress(16, 0, _("Check Media DB"))
# rename all files to checksum versions, note non-renamed ones
deck.updateProgress(_("Checksum files..."))
files = os.listdir(unicode(deck.mediaDir()))
mod = len(files) / 10
for c, oldBase in enumerate(files):
if mod and not c % mod:
deck.updateProgress()
if oldBase.startswith("latex-"):
continue
oldPath = os.path.join(deck.mediaDir(), oldBase)
if oldBase.startswith("."):
continue
if os.path.isdir(oldPath):
continue
newBase = copyToMedia(deck, oldPath)
if oldBase.lower() == newBase.lower():
existingFiles[oldBase] = 1
else:
renamedFiles[oldBase] = newBase
deck.updateProgress(value=10)
# now look through all fields, and update references to files
deck.updateProgress(_("Scan fields..."))
for (id, fid, val) in deck.s.all(
"select id, factId, value from fields"):
oldval = val
for (full, fname, repl) in mediaRefs(val):
if fname in renamedFiles:
# renamed
newBase = renamedFiles[fname]
val = re.sub(re.escape(full), repl % newBase, val)
usedFiles[newBase] = 1
elif fname in existingFiles:
# used & current
usedFiles[fname] = 1
else:
# missing
missingFileCount += 1
if deleteRefs:
val = re.sub(re.escape(full), "", val)
else: else:
factsMissingMedia[fid] = 1 refs[f] = 1
if val != oldval: # update ref counts
updateFields.append({'id': id, 'val': val}) for (file, count) in refs.items():
modifiedFacts[fid] = 1 updateMediaCount(deck, file, count)
else: # find unused media
if fid not in factsMissingMedia: unused = []
unmodifiedFacts[fid] = 1 for file in os.listdir(mdir):
# update modified fields path = os.path.join(mdir, file)
deck.updateProgress(_("Modify fields...")) if not os.path.isfile(path):
if modifiedFacts: # ignore directories
_modifyFields(deck, updateFields, modifiedFacts, dirty)
# fix tags
deck.updateProgress(_("Update tags..."))
if dirty:
deck.deleteTags(unmodifiedFacts.keys(), _("MediaMissing"))
if deleteRefs:
deck.deleteTags(modifiedFacts.keys(), _("MediaMissing"))
else:
deck.addTags(factsMissingMedia.keys(), _("MediaMissing"))
# build cache of db records
deck.updateProgress(_("Delete unused files..."))
mediaIds = dict(deck.s.all("select filename, id from media"))
# look through the media dir for any unused files, and delete
for f in os.listdir(unicode(deck.mediaDir())):
if f.startswith("."):
continue continue
if f.startswith("latex-"): if file not in refs:
continue unused.append(file)
path = os.path.join(deck.mediaDir(), f) # optionally delete
if os.path.isdir(path): if delete:
shutil.rmtree(path) removeUnusedMedia(deck)
continue for f in unused:
if f in usedFiles: path = os.path.join(mdir, f)
try:
del mediaIds[f]
except:
pass # case errors
else:
os.unlink(path) os.unlink(path)
unusedFileCount += 1 # check md5s are up to date
deck.updateProgress(_("Delete stale references...")) update = []
for (fname, id) in mediaIds.items(): for (file, created, md5) in deck.s.all(
# maybe delete from db "select filename, created, originalPath from media"):
if id: path = os.path.join(mdir, file)
deck.s.statement("delete from media where id = :id", id=id) if not os.path.exists(path):
deck.s.statement(""" if md5:
insert into mediaDeleted (mediaId, deletedTime) update.append({'f':file, 'sum':u"", 'c':time.time()})
values (:id, strftime('%s', 'now'))""", id=id) else:
# update deck and save sum = unicode(
deck.flushMod() checksum(open(os.path.join(mdir, file), "rb").read()))
deck.save() if md5 != sum:
update.append({'f':file, 'sum':sum, 'c':time.time()})
if update:
deck.s.statements("""
update media set originalPath = :sum, created = :c where filename = :f""",
update)
# update deck and get return info
if dirty:
deck.flushMod()
have = deck.s.scalar("select count() from media where originalPath != ''")
nohave = deck.s.column0("select filename from media where originalPath = ''")
deck.finishProgress() deck.finishProgress()
return missingFileCount, unusedFileCount - len(renamedFiles) return (have, nohave, unused)
# Download missing # Download missing
########################################################################## ##########################################################################
def downloadMissing(deck): def downloadMissing(deck):
from anki.latex import renderLatex urlbase = deck.getVar("mediaURL")
urls = dict( if not urlbase:
deck.s.all("select id, features from models where features != ''"))
if not urls:
return None return None
mdir = deck.mediaDir(create=True) mdir = deck.mediaDir(create=True)
deck.startProgress() deck.startProgress()
missing = {} missing = 0
for (id, fid, val, mid) in deck.s.all(""" grabbed = 0
select fields.id, factId, value, modelId from fields, facts for c, (f, sum) in enumerate(deck.s.all(
where facts.id = fields.factId"""): "select filename, not not originalPath from media")):
# add latex tags path = os.path.join(mdir, f)
val = renderLatex(deck, val, False) if not os.path.exists(path):
for (full, fname, repl) in mediaRefs(val): try:
if not os.path.exists(os.path.join(mdir, fname)) and mid in urls: rpath = urlbase + f
missing[fname] = mid url = urllib2.urlopen(rpath)
for c, file in enumerate(missing.keys()): open(f, "wb").write(url.read())
deck.updateProgress(label=_("Downloading %(a)d of %(b)d...") % { grabbed += 1
'a': c, except:
'b': len(missing), if sum:
}) # the file is supposed to exist
try: deck.finishProgress()
path = urls[missing[file]] + file return (False, rpath)
url = urllib2.urlopen(path) else:
open(file, "wb").write(url.read()) # ignore and keep going
except: missing += 1
deck.finishProgress() deck.updateProgress(label=_("File %d...") % (grabbed+missing))
return (False, path)
deck.finishProgress() deck.finishProgress()
return (True, len(missing)) return (True, grabbed, missing)
# Export original files
##########################################################################
def exportOriginalFiles(deck):
deck.startProgress()
origDir = deck.mediaDir(create=True)
newDir = origDir.replace(".media", ".originals")
try:
os.mkdir(newDir)
except (IOError, OSError):
pass
cnt = 0
for row in deck.s.all("select filename, originalPath from media"):
(fname, path) = row
base = os.path.basename(path)
if base == fname:
continue
cnt += 1
deck.updateProgress(label="Exporting %s" % base)
old = os.path.join(origDir, fname)
new = os.path.join(newDir, base)
if os.path.exists(new):
new = re.sub("(.*)(\..*?)$", "\\1-%s\\2" %
os.path.splitext(fname)[0], new)
shutil.copy2(old, new)
deck.finishProgress()
return cnt

View file

@ -132,7 +132,7 @@ class CardModel(object):
mapper(CardModel, cardModelsTable) mapper(CardModel, cardModelsTable)
def formatQA(cid, mid, fact, tags, cm): def formatQA(cid, mid, fact, tags, cm, deck):
"Return a dict of {id, question, answer}" "Return a dict of {id, question, answer}"
d = {'id': cid} d = {'id': cid}
fields = {} fields = {}
@ -154,9 +154,9 @@ def formatQA(cid, mid, fact, tags, cm):
# convert old style # convert old style
format = re.sub("%\((.+?)\)s", "{{\\1}}", format) format = re.sub("%\((.+?)\)s", "{{\\1}}", format)
# allow custom rendering functions & info # allow custom rendering functions & info
fields = runFilter("prepareFields", fields, cid, mid, fact, tags, cm) fields = runFilter("prepareFields", fields, cid, mid, fact, tags, cm, deck)
html = render(format, fields) html = render(format, fields)
d[type] = runFilter("formatQA", html, type, cid, mid, fact, tags, cm) d[type] = runFilter("formatQA", html, type, cid, mid, fact, tags, cm, deck)
return d return d
# Model table # Model table

View file

@ -35,7 +35,7 @@ from anki.stats import Stats, globalStats
from anki.history import CardHistoryEntry from anki.history import CardHistoryEntry
from anki.stats import globalStats from anki.stats import globalStats
from anki.utils import ids2str, hexifyID, checksum from anki.utils import ids2str, hexifyID, checksum
from anki.media import mediaRefs from anki.media import mediaFiles
from anki.lang import _ from anki.lang import _
from hooks import runHook from hooks import runHook
@ -1204,7 +1204,7 @@ select %(c)s from cards where
%(c)s like '%%<img %%' %(c)s like '%%<img %%'
or %(c)s like '%%[sound:%%'""" % {'c': col}) or %(c)s like '%%[sound:%%'""" % {'c': col})
for entry in txt: for entry in txt:
for (full, fname, repl) in mediaRefs(entry): for fname in mediaFiles(entry):
used[fname] = True used[fname] = True
# copy only used media # copy only used media
for file in files: for file in files:

106
tests/test_media.py Normal file
View file

@ -0,0 +1,106 @@
# coding: utf-8
import tempfile, os, time
import anki.media as m
from anki import DeckStorage
from anki.stdmodels import BasicModel
from anki.utils import checksum
# uniqueness check
def test_unique():
dir = tempfile.mkdtemp(prefix="anki")
# new file
n = "foo.jpg"
new = os.path.basename(m.uniquePath(dir, n))
assert new == n
# duplicate file
open(os.path.join(dir, n), "w").write("hello")
n = "foo.jpg"
new = os.path.basename(m.uniquePath(dir, n))
assert new == "foo (1).jpg"
# another duplicate
open(os.path.join(dir, "foo (1).jpg"), "w").write("hello")
n = "foo.jpg"
new = os.path.basename(m.uniquePath(dir, n))
assert new == "foo (2).jpg"
# copying files to media folder
def test_copy():
deck = DeckStorage.Deck()
dir = tempfile.mkdtemp(prefix="anki")
path = os.path.join(dir, "foo.jpg")
open(path, "w").write("hello")
# new file
assert m.copyToMedia(deck, path) == "foo.jpg"
# dupe md5
deck.s.statement("""
insert into media values (null, 'foo.jpg', 0, 0, :sum, '')""",
sum=checksum("hello"))
path = os.path.join(dir, "bar.jpg")
open(path, "w").write("hello")
assert m.copyToMedia(deck, path) == "foo.jpg"
# media db
def test_db():
deck = DeckStorage.Deck()
deck.addModel(BasicModel())
dir = tempfile.mkdtemp(prefix="anki")
path = os.path.join(dir, "foo.jpg")
open(path, "w").write("hello")
# add a new fact that references it twice
f = deck.newFact()
f['Front'] = u"<img src='foo.jpg'>"
f['Back'] = u"back [sound:foo.jpg]"
deck.addFact(f)
# 1 entry in the media db, with two references, and missing file
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select size from media") == 2
assert deck.s.scalar("select not originalPath from media")
# copy to media folder & check db
path = m.copyToMedia(deck, path)
m.rebuildMediaDir(deck)
# md5 should be set now
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select size from media") == 2
assert deck.s.scalar("select originalPath from media")
# edit the fact to remove a reference
f['Back'] = u""
f.setModified(True, deck)
deck.s.flush()
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select size from media") == 1
# remove the front reference too
f['Front'] = u""
f.setModified(True, deck)
assert deck.s.scalar("select size from media") == 0
# add the reference back
f['Front'] = u"<img src='foo.jpg'>"
f.setModified(True, deck)
assert deck.s.scalar("select size from media") == 1
# detect file modifications
oldsum = deck.s.scalar("select originalPath from media")
open(path, "w").write("world")
m.rebuildMediaDir(deck)
newsum = deck.s.scalar("select originalPath from media")
assert newsum and newsum != oldsum
# delete underlying file and check db
os.unlink(path)
m.rebuildMediaDir(deck)
# md5 should be gone again
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select not originalPath from media")
# media db should pick up media defined via templates & bulk update
f['Back'] = u"bar.jpg"
f.setModified(True, deck)
deck.s.flush()
# modify template & regenerate
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select sum(size) from media") == 1
deck.currentModel.cardModels[0].aformat=u'<img src="{{{Back}}}">'
deck.updateCardsFromModel(deck.currentModel)
assert deck.s.scalar("select sum(size) from media") == 2
assert deck.s.scalar("select count() from media") == 2
deck.currentModel.cardModels[0].aformat=u'{{{Back}}}'
deck.updateCardsFromModel(deck.currentModel)
assert deck.s.scalar("select count() from media") == 2
assert deck.s.scalar("select sum(size) from media") == 1

View file

@ -254,8 +254,8 @@ def test_localsync_media():
os.unlink(os.path.join(deck1media, "22161b29b0c18e068038021f54eee1ee.png")) os.unlink(os.path.join(deck1media, "22161b29b0c18e068038021f54eee1ee.png"))
rebuildMediaDir(deck1) rebuildMediaDir(deck1)
client.sync() client.sync()
assert deck1.s.scalar("select count(1) from media") == 2 assert deck1.s.scalar("select count(1) from media") == 3
assert deck2.s.scalar("select count(1) from media") == 2 assert deck2.s.scalar("select count(1) from media") == 3
# One way syncing # One way syncing
########################################################################## ##########################################################################