rewrite media support

- media is no longer hashed, and instead stored in the db using its original
  name
- when adding media, its checksum is calculated and used to look for
  duplicates
- duplicate filenames will result in a number tacked on the file
- the size column is used to count card references to media. If media is
  referenced in a fact but not the question or answer, the count will be zero.
- there is no guarantee media will be listed in the media db if it is unused
  on the question & answer
- if rebuildMediaDir(delete=True), then entries with zero references are
  deleted, along with any unused files in the media dir.
- rebuildMediaDir() will update the internal checksums, and set the checksum
  to "" if a file can't be found
- rebuildMediaDir() is a lot less destructive now, and will leave alone
  directories it finds in the media folder (but not look in them either)
- rebuildMediaDir() returns more information about the state of media now
- the online and mobile clients will need to to make sure that when
  downloading media, entries with no checksum are non-fatal and should not
  abort the download process.
- the ref count is updated every time the q/a is updated - so the db should be
  up to date after every add/edit/import
- since we look for media on the q/a now, card templates like '<img
  src="{{{field}}}">' will work now
- export original files as gone as it is not needed anymore
- move from per-model media URL to deckVar. downloadMissingMedia() uses this
  now. Deck subscriptions will have to be updated to share media another way.
- pass deck in formatQA, as latex support is going to change
This commit is contained in:
Damien Elmes 2010-12-11 00:51:48 +09:00
parent c4c9847028
commit 0c9672e7b8
9 changed files with 358 additions and 270 deletions

View file

@ -29,7 +29,7 @@ Edit the card:
fields = card.fact.model.fieldModels
for field in fields:
card.fact[field.name] = "newvalue"
card.fact.setModified(textChanged=True)
card.fact.setModified(textChanged=True, deck=deck)
deck.setModified()
Get all cards via ORM (slow):

View file

@ -13,6 +13,7 @@ from anki.db import *
from anki.models import CardModel, Model, FieldModel, formatQA
from anki.facts import Fact, factsTable, Field
from anki.utils import parseTags, findTag, stripHTML, genID, hexifyID
from anki.media import updateMediaCount, mediaFiles
# Cards
##########################################################################
@ -92,12 +93,37 @@ class Card(object):
# for non-orm use
self.cardModelId = cardModel.id
self.ordinal = cardModel.ordinal
def rebuildQA(self, deck, media=True):
# format qa
d = {}
for f in self.fact.model.fieldModels:
d[f.name] = (f.id, self.fact[f.name])
qa = formatQA(None, fact.modelId, d, self.splitTags(), cardModel)
qa = formatQA(None, self.fact.modelId, d, self.splitTags(),
self.cardModel, deck)
# find old media references
files = {}
for type in ("question", "answer"):
for f in mediaFiles(getattr(self, type) or ""):
if f in files:
files[f] -= 1
else:
files[f] = -1
# update q/a
self.question = qa['question']
self.answer = qa['answer']
# determine media delta
for type in ("question", "answer"):
for f in mediaFiles(getattr(self, type)):
if f in files:
files[f] += 1
else:
files[f] = 1
# update media counts if we're attached to deck
if media:
for (f, cnt) in files.items():
updateMediaCount(deck, f, cnt)
self.setModified()
def setModified(self):
self.modified = time.time()

View file

@ -26,8 +26,10 @@ from operator import itemgetter
from itertools import groupby
from anki.hooks import runHook, hookEmpty
from anki.template import render
from anki.media import updateMediaCount, mediaFiles, \
rebuildMediaDir
# ensure all the metadata in other files is loaded before proceeding
# ensure all the DB metadata in other files is loaded before proceeding
import anki.models, anki.facts, anki.cards, anki.stats
import anki.history, anki.media
@ -69,7 +71,7 @@ SEARCH_FIELD = 6
SEARCH_FIELD_EXISTS = 7
SEARCH_QA = 8
SEARCH_PHRASE_WB = 9
DECK_VERSION = 58
DECK_VERSION = 60
deckVarsTable = Table(
'deckVars', metadata,
@ -161,7 +163,7 @@ class Deck(object):
self.lastSessionStart = 0
self.queueLimit = 200
# if most recent deck var not defined, make sure defaults are set
if not self.s.scalar("select 1 from deckVars where key = 'newSpacing'"):
if not self.s.scalar("select 1 from deckVars where key = 'mediaURL'"):
self.setVarDefault("suspendLeeches", True)
self.setVarDefault("leechFails", 16)
self.setVarDefault("perDay", True)
@ -170,6 +172,7 @@ class Deck(object):
self.setVarDefault("newInactive", self.suspended)
self.setVarDefault("revInactive", self.suspended)
self.setVarDefault("newSpacing", 60)
self.setVarDefault("mediaURL", "")
self.updateCutoff()
self.setupStandardScheduler()
@ -719,6 +722,7 @@ limit %s""" % (self.cramOrder, self.queueLimit)))
card = anki.cards.Card()
if not card.fromDB(self.s, id):
return
card.deck = self
card.genFuzz()
card.startTimer()
return card
@ -898,7 +902,7 @@ and type between 1 and 2""",
tags = scard.fact.tags
tags = addTags("Leech", tags)
scard.fact.tags = canonifyTags(tags)
scard.fact.setModified(textChanged=True)
scard.fact.setModified(textChanged=True, deck=self)
self.updateFactTags([scard.fact.id])
self.s.flush()
self.s.expunge(scard)
@ -1425,7 +1429,6 @@ and due < :now""", now=time.time())
cards = []
self.s.save(fact)
# update field cache
fact.setModified(True)
self.factCount += 1
self.flushMod()
isRandom = self.newCardOrder == NEW_CARDS_RANDOM
@ -1440,6 +1443,8 @@ and due < :now""", now=time.time())
card.combinedDue = due
self.flushMod()
cards.append(card)
# update card q/a
fact.setModified(True, self)
self.updateFactTags([fact.id])
# this will call reset() which will update counts
self.updatePriorities([c.id for c in cards])
@ -1496,12 +1501,16 @@ where factId = :fid and cardModelId = :cmid""",
fid=fact.id, cmid=cardModel.id) == 0:
# enough for 10 card models assuming 0.00001 timer precision
card = anki.cards.Card(
fact, cardModel, created=fact.created+0.0001*cardModel.ordinal)
fact, cardModel,
fact.created+0.0001*cardModel.ordinal)
self.updateCardTags([card.id])
self.updatePriority(card)
self.cardCount += 1
self.newCount += 1
ids.append(card.id)
if ids:
fact.setModified(textChanged=True, deck=self)
self.setModified()
return ids
@ -1565,6 +1574,7 @@ where facts.id not in (select distinct factId from cards)""")
for cardModel in cms:
card = anki.cards.Card(fact, cardModel)
cards.append(card)
fact.setModified(textChanged=True, deck=self, media=False)
return cards
def cloneFact(self, oldFact):
@ -1951,10 +1961,10 @@ and c.id in %s""" % ids2str(ids))
else:
mod = ""
# tags
cids = ids2str([x[0] for x in ids])
tags = dict([(x[0], x[1:]) for x in
self.splitTagsList(
where="and cards.id in %s" %
ids2str([x[0] for x in ids]))])
where="and cards.id in %s" % cids)])
facts = {}
# fields
for k, g in groupby(self.s.all("""
@ -1968,9 +1978,33 @@ order by fields.factId""" % ids2str([x[2] for x in ids])),
cms = {}
for c in self.s.query(CardModel).all():
cms[c.id] = c
pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid])
pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid], self)
for (cid, cmid, fid, mid) in ids]
if pend:
# find existing media references
files = {}
for txt in self.s.column0(
"select question || answer from cards where id in %s" %
cids):
for f in mediaFiles(txt):
if f in files:
files[f] -= 1
else:
files[f] = -1
# determine ref count delta
for p in pend:
for type in ("question", "answer"):
txt = p[type]
for f in mediaFiles(txt):
if f in files:
files[f] += 1
else:
files[f] = 1
# update references - this could be more efficient
for (f, cnt) in files.items():
if not cnt:
continue
updateMediaCount(self, f, cnt)
# update q/a
self.s.execute("""
update cards set
@ -1979,6 +2013,7 @@ order by fields.factId""" % ids2str([x[2] for x in ids])),
where id = :id""" % mod, pend)
# update fields cache
self.updateFieldCache(facts.keys())
if dirty:
self.flushMod()
def updateFieldCache(self, fids):
@ -3018,7 +3053,7 @@ where key = :key""", key=key, value=value):
if not self.tmpMediaDir and create:
self.tmpMediaDir = tempfile.mkdtemp(prefix="anki")
dir = self.tmpMediaDir
if not os.path.exists(dir):
if not dir or not os.path.exists(dir):
return None
# change to the current dir
os.chdir(dir)
@ -3090,6 +3125,7 @@ Return new path, relative to media dir."""
self.s = None
def setModified(self, newTime=None):
#import traceback; traceback.print_stack()
self.modified = newTime or time.time()
def flushMod(self):
@ -3878,6 +3914,7 @@ order by priority desc, due desc""")
# we're opening a shared deck with no indices - we'll need
# them if we want to rebuild the queue
DeckStorage._addIndices(deck)
oldmod = deck.modified
else:
prog = False
deck.path = path
@ -3996,10 +4033,6 @@ select filename, size, created, originalPath, description from media""")
deck.s.statements("""
insert into media values (
:id, :filename, :size, :created, :originalPath, :description)""", h)
# rerun check
anki.media.rebuildMediaDir(deck, dirty=False)
# no need to track deleted media yet
deck.s.execute("delete from mediaDeleted")
deck.version = 9
if deck.version < 10:
deck.s.statement("""
@ -4211,7 +4244,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
deck.failedCardMax = 0
deck.version = 37
deck.s.commit()
# skip 38
if deck.version < 39:
deck.reset()
# manually suspend all suspended cards
@ -4232,7 +4264,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
deck.s.statement("update models set features = ''")
deck.version = 40
deck.s.commit()
# skip 41
if deck.version < 42:
deck.version = 42
deck.s.commit()
@ -4270,7 +4301,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
DeckStorage._addIndices(deck)
deck.version = 50
deck.s.commit()
# skip 51
if deck.version < 52:
dname = deck.name()
sname = deck.syncName
@ -4329,6 +4359,11 @@ update cards set due = created, combinedDue = created
where relativeDelay = 2""")
deck.version = 58
deck.s.commit()
if deck.version < 60:
# rebuild the media db based on new format
rebuildMediaDir(deck, dirty=False)
deck.version = 60
deck.s.commit()
# executing a pragma here is very slow on large decks, so we store
# our own record
if not deck.getInt("pageSize") == 4096:
@ -4339,6 +4374,7 @@ where relativeDelay = 2""")
deck.setVar("pageSize", 4096, mod=False)
deck.s.commit()
if prog:
assert deck.modified == oldmod
deck.finishProgress()
return deck
_upgradeDeck = staticmethod(_upgradeDeck)

View file

@ -129,19 +129,15 @@ class Fact(object):
def focusLost(self, field):
runHook('fact.focusLost', self, field)
def setModified(self, textChanged=False):
def setModified(self, textChanged=False, deck=None, media=True):
"Mark modified and update cards."
self.modified = time.time()
if textChanged:
d = {}
for f in self.model.fieldModels:
d[f.name] = (f.id, self[f.name])
self.spaceUntil = stripHTMLMedia(u" ".join([x[1] for x in d.values()]))
assert deck
self.spaceUntil = stripHTMLMedia(u" ".join(
self.values()))
for card in self.cards:
qa = formatQA(None, self.modelId, d, card.splitTags(), card.cardModel)
card.question = qa['question']
card.answer = qa['answer']
card.setModified()
card.rebuildQA(deck)
# Fact deletions
##########################################################################

View file

@ -8,16 +8,13 @@ Media support
"""
__docformat__ = 'restructuredtext'
import os, stat, time, shutil, re, sys, urllib2
import os, shutil, re, urllib2, time
from anki.db import *
from anki.facts import Fact
from anki.utils import addTags, genID, ids2str, checksum
from anki.utils import checksum, genID
from anki.lang import _
regexps = (("(\[sound:([^]]+)\])",
"[sound:%s]"),
("(<img src=[\"']?([^\"'>]+)[\"']? ?/?>)",
"<img src=\"%s\">"))
regexps = ("(\[sound:([^]]+)\])",
"(<img src=[\"']?([^\"'>]+)[\"']? ?/?>)")
# Tables
##########################################################################
@ -26,9 +23,15 @@ mediaTable = Table(
'media', metadata,
Column('id', Integer, primary_key=True, nullable=False),
Column('filename', UnicodeText, nullable=False),
# reused as reference count
Column('size', Integer, nullable=False),
# treated as modification date, not creation date
Column('created', Float, nullable=False),
# reused as md5sum. empty string if file doesn't exist on disk
Column('originalPath', UnicodeText, nullable=False, default=u""),
# older versions stored original filename here, so we'll leave it for now
# in case we add a feature to rename media back to its original name. in
# the future we may want to zero this to save space
Column('description', UnicodeText, nullable=False, default=u""))
class Media(object):
@ -42,258 +45,179 @@ mediaDeletedTable = Table(
nullable=False),
Column('deletedTime', Float, nullable=False))
# Helper functions
# File handling
##########################################################################
def mediaFilename(path):
"Return checksum.ext for path"
new = checksum(open(path, "rb").read())
ext = os.path.splitext(path)[1].lower()
return "%s%s" % (new, ext)
def copyToMedia(deck, path):
"""Copy PATH to MEDIADIR, and return new filename.
Update media table. If file already exists, don't copy."""
origPath = path
description = os.path.splitext(os.path.basename(path))[0]
newBase = mediaFilename(path)
new = os.path.join(deck.mediaDir(create=True), newBase)
# copy if not existing
if not os.path.exists(new):
if new.lower() == path.lower():
# case insensitive filesystems suck
os.rename(path, new)
If a file with the same md5sum exists in the DB, return that.
If a file with the same name exists, return a unique name.
This does not modify the media table."""
# see if have duplicate contents
newpath = deck.s.scalar(
"select filename from media where originalPath = :cs",
cs=checksum(open(path, "rb").read()))
# check if this filename already exists
if not newpath:
base = os.path.basename(path)
mdir = deck.mediaDir(create=True)
newpath = uniquePath(mdir, base)
shutil.copy2(path, newpath)
return os.path.basename(newpath)
def uniquePath(dir, base):
# remove any dangerous characters
base = re.sub(r"[][<>:/\\]", "", base)
# find a unique name
(root, ext) = os.path.splitext(base)
def repl(match):
n = int(match.group(1))
return " (%d)" % (n+1)
while True:
path = os.path.join(dir, root + ext)
if not os.path.exists(path):
break
reg = " \((\d+)\)$"
if not re.search(reg, root):
root = root + " (1)"
else:
shutil.copy2(path, new)
newSize = os.stat(new)[stat.ST_SIZE]
if not deck.s.scalar(
"select 1 from media where filename = :f",
f=newBase):
# if the user has modified a hashed file, try to remember the old
# filename
old = deck.s.scalar(
"select originalPath from media where filename = :s",
s=os.path.basename(origPath))
if old:
origPath = old
description = os.path.splitext(os.path.basename(origPath))[0]
try:
path = unicode(path, sys.getfilesystemencoding())
except TypeError:
pass
deck.s.statement("""
insert into media (id, filename, size, created, originalPath,
description)
values (:id, :filename, :size, :created, :originalPath,
:description)""",
id=genID(),
filename=newBase,
size=newSize,
created=time.time(),
originalPath=origPath,
description=description)
deck.flushMod()
return newBase
root = re.sub(reg, repl, root)
return path
def _modifyFields(deck, fieldsToUpdate, modifiedFacts, dirty):
factIds = ids2str(modifiedFacts.keys())
if fieldsToUpdate:
deck.s.execute("update fields set value = :val where id = :id",
fieldsToUpdate)
# DB routines
##########################################################################
def updateMediaCount(deck, file, count=1):
mdir = deck.mediaDir()
if deck.s.scalar(
"select 1 from media where filename = :file", file=file):
deck.s.statement(
"update facts set modified = :time where id in %s" %
factIds, time=time.time())
ids = deck.s.all("""select cards.id, cards.cardModelId, facts.id,
facts.modelId from cards, facts where
cards.factId = facts.id and facts.id in %s"""
% factIds)
deck.updateCardQACache(ids, dirty)
deck.flushMod()
"update media set size = size + :c, created = :t where filename = :file",
file=file, c=count, t=time.time())
elif count > 0:
try:
sum = unicode(
checksum(open(os.path.join(mdir, file), "rb").read()))
except:
sum = u""
deck.s.statement("""
insert into media (id, filename, size, created, originalPath, description)
values (:id, :file, :c, :mod, :sum, '')""",
id=genID(), file=file, c=count, mod=time.time(),
sum=sum)
def removeUnusedMedia(deck):
ids = deck.s.column0("select id from media where size = 0")
for id in ids:
deck.s.statement("insert into mediaDeleted values (:id, :t)",
id=id, t=time.time())
deck.s.statement("delete from media where size = 0")
def mediaRefs(string):
"Return list of (fullMatch, filename, replacementString)."
# String manipulation
##########################################################################
def mediaFiles(string):
l = []
for (reg, repl) in regexps:
for reg in regexps:
for (full, fname) in re.findall(reg, string):
l.append((full, fname, repl))
l.append(fname)
return l
def stripMedia(txt):
for (reg, x) in regexps:
for reg in regexps:
txt = re.sub(reg, "", txt)
return txt
# Rebuilding DB
##########################################################################
def rebuildMediaDir(deck, deleteRefs=False, dirty=True):
"Delete references to missing files, delete unused files."
localFiles = {}
modifiedFacts = {}
unmodifiedFacts = {}
renamedFiles = {}
existingFiles = {}
factsMissingMedia = {}
updateFields = []
usedFiles = {}
unusedFileCount = 0
missingFileCount = 0
deck.mediaDir(create=True)
deck.startProgress(16, 0, _("Check Media DB"))
# rename all files to checksum versions, note non-renamed ones
deck.updateProgress(_("Checksum files..."))
files = os.listdir(unicode(deck.mediaDir()))
mod = len(files) / 10
for c, oldBase in enumerate(files):
if mod and not c % mod:
deck.updateProgress()
if oldBase.startswith("latex-"):
continue
oldPath = os.path.join(deck.mediaDir(), oldBase)
if oldBase.startswith("."):
continue
if os.path.isdir(oldPath):
continue
newBase = copyToMedia(deck, oldPath)
if oldBase.lower() == newBase.lower():
existingFiles[oldBase] = 1
def rebuildMediaDir(deck, delete=False, dirty=True):
deck.startProgress(title=_("Check Media DB"))
mdir = deck.mediaDir(create=True)
# set all ref counts to 0
deck.s.statement("update media set size = 0")
# look through cards for media references
refs = {}
for (question, answer) in deck.s.all(
"select question, answer from cards"):
for txt in (question, answer):
for f in mediaFiles(txt):
if f in refs:
refs[f] += 1
else:
renamedFiles[oldBase] = newBase
deck.updateProgress(value=10)
# now look through all fields, and update references to files
deck.updateProgress(_("Scan fields..."))
for (id, fid, val) in deck.s.all(
"select id, factId, value from fields"):
oldval = val
for (full, fname, repl) in mediaRefs(val):
if fname in renamedFiles:
# renamed
newBase = renamedFiles[fname]
val = re.sub(re.escape(full), repl % newBase, val)
usedFiles[newBase] = 1
elif fname in existingFiles:
# used & current
usedFiles[fname] = 1
else:
# missing
missingFileCount += 1
if deleteRefs:
val = re.sub(re.escape(full), "", val)
else:
factsMissingMedia[fid] = 1
if val != oldval:
updateFields.append({'id': id, 'val': val})
modifiedFacts[fid] = 1
else:
if fid not in factsMissingMedia:
unmodifiedFacts[fid] = 1
# update modified fields
deck.updateProgress(_("Modify fields..."))
if modifiedFacts:
_modifyFields(deck, updateFields, modifiedFacts, dirty)
# fix tags
deck.updateProgress(_("Update tags..."))
if dirty:
deck.deleteTags(unmodifiedFacts.keys(), _("MediaMissing"))
if deleteRefs:
deck.deleteTags(modifiedFacts.keys(), _("MediaMissing"))
else:
deck.addTags(factsMissingMedia.keys(), _("MediaMissing"))
# build cache of db records
deck.updateProgress(_("Delete unused files..."))
mediaIds = dict(deck.s.all("select filename, id from media"))
# look through the media dir for any unused files, and delete
for f in os.listdir(unicode(deck.mediaDir())):
if f.startswith("."):
refs[f] = 1
# update ref counts
for (file, count) in refs.items():
updateMediaCount(deck, file, count)
# find unused media
unused = []
for file in os.listdir(mdir):
path = os.path.join(mdir, file)
if not os.path.isfile(path):
# ignore directories
continue
if f.startswith("latex-"):
continue
path = os.path.join(deck.mediaDir(), f)
if os.path.isdir(path):
shutil.rmtree(path)
continue
if f in usedFiles:
try:
del mediaIds[f]
except:
pass # case errors
else:
if file not in refs:
unused.append(file)
# optionally delete
if delete:
removeUnusedMedia(deck)
for f in unused:
path = os.path.join(mdir, f)
os.unlink(path)
unusedFileCount += 1
deck.updateProgress(_("Delete stale references..."))
for (fname, id) in mediaIds.items():
# maybe delete from db
if id:
deck.s.statement("delete from media where id = :id", id=id)
deck.s.statement("""
insert into mediaDeleted (mediaId, deletedTime)
values (:id, strftime('%s', 'now'))""", id=id)
# update deck and save
# check md5s are up to date
update = []
for (file, created, md5) in deck.s.all(
"select filename, created, originalPath from media"):
path = os.path.join(mdir, file)
if not os.path.exists(path):
if md5:
update.append({'f':file, 'sum':u"", 'c':time.time()})
else:
sum = unicode(
checksum(open(os.path.join(mdir, file), "rb").read()))
if md5 != sum:
update.append({'f':file, 'sum':sum, 'c':time.time()})
if update:
deck.s.statements("""
update media set originalPath = :sum, created = :c where filename = :f""",
update)
# update deck and get return info
if dirty:
deck.flushMod()
deck.save()
have = deck.s.scalar("select count() from media where originalPath != ''")
nohave = deck.s.column0("select filename from media where originalPath = ''")
deck.finishProgress()
return missingFileCount, unusedFileCount - len(renamedFiles)
return (have, nohave, unused)
# Download missing
##########################################################################
def downloadMissing(deck):
from anki.latex import renderLatex
urls = dict(
deck.s.all("select id, features from models where features != ''"))
if not urls:
urlbase = deck.getVar("mediaURL")
if not urlbase:
return None
mdir = deck.mediaDir(create=True)
deck.startProgress()
missing = {}
for (id, fid, val, mid) in deck.s.all("""
select fields.id, factId, value, modelId from fields, facts
where facts.id = fields.factId"""):
# add latex tags
val = renderLatex(deck, val, False)
for (full, fname, repl) in mediaRefs(val):
if not os.path.exists(os.path.join(mdir, fname)) and mid in urls:
missing[fname] = mid
for c, file in enumerate(missing.keys()):
deck.updateProgress(label=_("Downloading %(a)d of %(b)d...") % {
'a': c,
'b': len(missing),
})
missing = 0
grabbed = 0
for c, (f, sum) in enumerate(deck.s.all(
"select filename, not not originalPath from media")):
path = os.path.join(mdir, f)
if not os.path.exists(path):
try:
path = urls[missing[file]] + file
url = urllib2.urlopen(path)
open(file, "wb").write(url.read())
rpath = urlbase + f
url = urllib2.urlopen(rpath)
open(f, "wb").write(url.read())
grabbed += 1
except:
if sum:
# the file is supposed to exist
deck.finishProgress()
return (False, path)
return (False, rpath)
else:
# ignore and keep going
missing += 1
deck.updateProgress(label=_("File %d...") % (grabbed+missing))
deck.finishProgress()
return (True, len(missing))
# Export original files
##########################################################################
def exportOriginalFiles(deck):
deck.startProgress()
origDir = deck.mediaDir(create=True)
newDir = origDir.replace(".media", ".originals")
try:
os.mkdir(newDir)
except (IOError, OSError):
pass
cnt = 0
for row in deck.s.all("select filename, originalPath from media"):
(fname, path) = row
base = os.path.basename(path)
if base == fname:
continue
cnt += 1
deck.updateProgress(label="Exporting %s" % base)
old = os.path.join(origDir, fname)
new = os.path.join(newDir, base)
if os.path.exists(new):
new = re.sub("(.*)(\..*?)$", "\\1-%s\\2" %
os.path.splitext(fname)[0], new)
shutil.copy2(old, new)
deck.finishProgress()
return cnt
return (True, grabbed, missing)

View file

@ -132,7 +132,7 @@ class CardModel(object):
mapper(CardModel, cardModelsTable)
def formatQA(cid, mid, fact, tags, cm):
def formatQA(cid, mid, fact, tags, cm, deck):
"Return a dict of {id, question, answer}"
d = {'id': cid}
fields = {}
@ -154,9 +154,9 @@ def formatQA(cid, mid, fact, tags, cm):
# convert old style
format = re.sub("%\((.+?)\)s", "{{\\1}}", format)
# allow custom rendering functions & info
fields = runFilter("prepareFields", fields, cid, mid, fact, tags, cm)
fields = runFilter("prepareFields", fields, cid, mid, fact, tags, cm, deck)
html = render(format, fields)
d[type] = runFilter("formatQA", html, type, cid, mid, fact, tags, cm)
d[type] = runFilter("formatQA", html, type, cid, mid, fact, tags, cm, deck)
return d
# Model table

View file

@ -35,7 +35,7 @@ from anki.stats import Stats, globalStats
from anki.history import CardHistoryEntry
from anki.stats import globalStats
from anki.utils import ids2str, hexifyID, checksum
from anki.media import mediaRefs
from anki.media import mediaFiles
from anki.lang import _
from hooks import runHook
@ -1204,7 +1204,7 @@ select %(c)s from cards where
%(c)s like '%%<img %%'
or %(c)s like '%%[sound:%%'""" % {'c': col})
for entry in txt:
for (full, fname, repl) in mediaRefs(entry):
for fname in mediaFiles(entry):
used[fname] = True
# copy only used media
for file in files:

106
tests/test_media.py Normal file
View file

@ -0,0 +1,106 @@
# coding: utf-8
import tempfile, os, time
import anki.media as m
from anki import DeckStorage
from anki.stdmodels import BasicModel
from anki.utils import checksum
# uniqueness check
def test_unique():
dir = tempfile.mkdtemp(prefix="anki")
# new file
n = "foo.jpg"
new = os.path.basename(m.uniquePath(dir, n))
assert new == n
# duplicate file
open(os.path.join(dir, n), "w").write("hello")
n = "foo.jpg"
new = os.path.basename(m.uniquePath(dir, n))
assert new == "foo (1).jpg"
# another duplicate
open(os.path.join(dir, "foo (1).jpg"), "w").write("hello")
n = "foo.jpg"
new = os.path.basename(m.uniquePath(dir, n))
assert new == "foo (2).jpg"
# copying files to media folder
def test_copy():
deck = DeckStorage.Deck()
dir = tempfile.mkdtemp(prefix="anki")
path = os.path.join(dir, "foo.jpg")
open(path, "w").write("hello")
# new file
assert m.copyToMedia(deck, path) == "foo.jpg"
# dupe md5
deck.s.statement("""
insert into media values (null, 'foo.jpg', 0, 0, :sum, '')""",
sum=checksum("hello"))
path = os.path.join(dir, "bar.jpg")
open(path, "w").write("hello")
assert m.copyToMedia(deck, path) == "foo.jpg"
# media db
def test_db():
deck = DeckStorage.Deck()
deck.addModel(BasicModel())
dir = tempfile.mkdtemp(prefix="anki")
path = os.path.join(dir, "foo.jpg")
open(path, "w").write("hello")
# add a new fact that references it twice
f = deck.newFact()
f['Front'] = u"<img src='foo.jpg'>"
f['Back'] = u"back [sound:foo.jpg]"
deck.addFact(f)
# 1 entry in the media db, with two references, and missing file
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select size from media") == 2
assert deck.s.scalar("select not originalPath from media")
# copy to media folder & check db
path = m.copyToMedia(deck, path)
m.rebuildMediaDir(deck)
# md5 should be set now
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select size from media") == 2
assert deck.s.scalar("select originalPath from media")
# edit the fact to remove a reference
f['Back'] = u""
f.setModified(True, deck)
deck.s.flush()
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select size from media") == 1
# remove the front reference too
f['Front'] = u""
f.setModified(True, deck)
assert deck.s.scalar("select size from media") == 0
# add the reference back
f['Front'] = u"<img src='foo.jpg'>"
f.setModified(True, deck)
assert deck.s.scalar("select size from media") == 1
# detect file modifications
oldsum = deck.s.scalar("select originalPath from media")
open(path, "w").write("world")
m.rebuildMediaDir(deck)
newsum = deck.s.scalar("select originalPath from media")
assert newsum and newsum != oldsum
# delete underlying file and check db
os.unlink(path)
m.rebuildMediaDir(deck)
# md5 should be gone again
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select not originalPath from media")
# media db should pick up media defined via templates & bulk update
f['Back'] = u"bar.jpg"
f.setModified(True, deck)
deck.s.flush()
# modify template & regenerate
assert deck.s.scalar("select count() from media") == 1
assert deck.s.scalar("select sum(size) from media") == 1
deck.currentModel.cardModels[0].aformat=u'<img src="{{{Back}}}">'
deck.updateCardsFromModel(deck.currentModel)
assert deck.s.scalar("select sum(size) from media") == 2
assert deck.s.scalar("select count() from media") == 2
deck.currentModel.cardModels[0].aformat=u'{{{Back}}}'
deck.updateCardsFromModel(deck.currentModel)
assert deck.s.scalar("select count() from media") == 2
assert deck.s.scalar("select sum(size) from media") == 1

View file

@ -254,8 +254,8 @@ def test_localsync_media():
os.unlink(os.path.join(deck1media, "22161b29b0c18e068038021f54eee1ee.png"))
rebuildMediaDir(deck1)
client.sync()
assert deck1.s.scalar("select count(1) from media") == 2
assert deck2.s.scalar("select count(1) from media") == 2
assert deck1.s.scalar("select count(1) from media") == 3
assert deck2.s.scalar("select count(1) from media") == 3
# One way syncing
##########################################################################