mirror of
https://github.com/ankitects/anki.git
synced 2025-09-24 08:46:37 -04:00
rewrite media support
- media is no longer hashed, and instead stored in the db using its original name - when adding media, its checksum is calculated and used to look for duplicates - duplicate filenames will result in a number tacked on the file - the size column is used to count card references to media. If media is referenced in a fact but not the question or answer, the count will be zero. - there is no guarantee media will be listed in the media db if it is unused on the question & answer - if rebuildMediaDir(delete=True), then entries with zero references are deleted, along with any unused files in the media dir. - rebuildMediaDir() will update the internal checksums, and set the checksum to "" if a file can't be found - rebuildMediaDir() is a lot less destructive now, and will leave alone directories it finds in the media folder (but not look in them either) - rebuildMediaDir() returns more information about the state of media now - the online and mobile clients will need to to make sure that when downloading media, entries with no checksum are non-fatal and should not abort the download process. - the ref count is updated every time the q/a is updated - so the db should be up to date after every add/edit/import - since we look for media on the q/a now, card templates like '<img src="{{{field}}}">' will work now - export original files as gone as it is not needed anymore - move from per-model media URL to deckVar. downloadMissingMedia() uses this now. Deck subscriptions will have to be updated to share media another way. - pass deck in formatQA, as latex support is going to change
This commit is contained in:
parent
c4c9847028
commit
0c9672e7b8
9 changed files with 358 additions and 270 deletions
|
@ -29,7 +29,7 @@ Edit the card:
|
|||
fields = card.fact.model.fieldModels
|
||||
for field in fields:
|
||||
card.fact[field.name] = "newvalue"
|
||||
card.fact.setModified(textChanged=True)
|
||||
card.fact.setModified(textChanged=True, deck=deck)
|
||||
deck.setModified()
|
||||
|
||||
Get all cards via ORM (slow):
|
||||
|
|
|
@ -13,6 +13,7 @@ from anki.db import *
|
|||
from anki.models import CardModel, Model, FieldModel, formatQA
|
||||
from anki.facts import Fact, factsTable, Field
|
||||
from anki.utils import parseTags, findTag, stripHTML, genID, hexifyID
|
||||
from anki.media import updateMediaCount, mediaFiles
|
||||
|
||||
# Cards
|
||||
##########################################################################
|
||||
|
@ -92,12 +93,37 @@ class Card(object):
|
|||
# for non-orm use
|
||||
self.cardModelId = cardModel.id
|
||||
self.ordinal = cardModel.ordinal
|
||||
d = {}
|
||||
for f in self.fact.model.fieldModels:
|
||||
d[f.name] = (f.id, self.fact[f.name])
|
||||
qa = formatQA(None, fact.modelId, d, self.splitTags(), cardModel)
|
||||
self.question = qa['question']
|
||||
self.answer = qa['answer']
|
||||
|
||||
def rebuildQA(self, deck, media=True):
|
||||
# format qa
|
||||
d = {}
|
||||
for f in self.fact.model.fieldModels:
|
||||
d[f.name] = (f.id, self.fact[f.name])
|
||||
qa = formatQA(None, self.fact.modelId, d, self.splitTags(),
|
||||
self.cardModel, deck)
|
||||
# find old media references
|
||||
files = {}
|
||||
for type in ("question", "answer"):
|
||||
for f in mediaFiles(getattr(self, type) or ""):
|
||||
if f in files:
|
||||
files[f] -= 1
|
||||
else:
|
||||
files[f] = -1
|
||||
# update q/a
|
||||
self.question = qa['question']
|
||||
self.answer = qa['answer']
|
||||
# determine media delta
|
||||
for type in ("question", "answer"):
|
||||
for f in mediaFiles(getattr(self, type)):
|
||||
if f in files:
|
||||
files[f] += 1
|
||||
else:
|
||||
files[f] = 1
|
||||
# update media counts if we're attached to deck
|
||||
if media:
|
||||
for (f, cnt) in files.items():
|
||||
updateMediaCount(deck, f, cnt)
|
||||
self.setModified()
|
||||
|
||||
def setModified(self):
|
||||
self.modified = time.time()
|
||||
|
|
74
anki/deck.py
74
anki/deck.py
|
@ -26,8 +26,10 @@ from operator import itemgetter
|
|||
from itertools import groupby
|
||||
from anki.hooks import runHook, hookEmpty
|
||||
from anki.template import render
|
||||
from anki.media import updateMediaCount, mediaFiles, \
|
||||
rebuildMediaDir
|
||||
|
||||
# ensure all the metadata in other files is loaded before proceeding
|
||||
# ensure all the DB metadata in other files is loaded before proceeding
|
||||
import anki.models, anki.facts, anki.cards, anki.stats
|
||||
import anki.history, anki.media
|
||||
|
||||
|
@ -69,7 +71,7 @@ SEARCH_FIELD = 6
|
|||
SEARCH_FIELD_EXISTS = 7
|
||||
SEARCH_QA = 8
|
||||
SEARCH_PHRASE_WB = 9
|
||||
DECK_VERSION = 58
|
||||
DECK_VERSION = 60
|
||||
|
||||
deckVarsTable = Table(
|
||||
'deckVars', metadata,
|
||||
|
@ -161,7 +163,7 @@ class Deck(object):
|
|||
self.lastSessionStart = 0
|
||||
self.queueLimit = 200
|
||||
# if most recent deck var not defined, make sure defaults are set
|
||||
if not self.s.scalar("select 1 from deckVars where key = 'newSpacing'"):
|
||||
if not self.s.scalar("select 1 from deckVars where key = 'mediaURL'"):
|
||||
self.setVarDefault("suspendLeeches", True)
|
||||
self.setVarDefault("leechFails", 16)
|
||||
self.setVarDefault("perDay", True)
|
||||
|
@ -170,6 +172,7 @@ class Deck(object):
|
|||
self.setVarDefault("newInactive", self.suspended)
|
||||
self.setVarDefault("revInactive", self.suspended)
|
||||
self.setVarDefault("newSpacing", 60)
|
||||
self.setVarDefault("mediaURL", "")
|
||||
self.updateCutoff()
|
||||
self.setupStandardScheduler()
|
||||
|
||||
|
@ -719,6 +722,7 @@ limit %s""" % (self.cramOrder, self.queueLimit)))
|
|||
card = anki.cards.Card()
|
||||
if not card.fromDB(self.s, id):
|
||||
return
|
||||
card.deck = self
|
||||
card.genFuzz()
|
||||
card.startTimer()
|
||||
return card
|
||||
|
@ -898,7 +902,7 @@ and type between 1 and 2""",
|
|||
tags = scard.fact.tags
|
||||
tags = addTags("Leech", tags)
|
||||
scard.fact.tags = canonifyTags(tags)
|
||||
scard.fact.setModified(textChanged=True)
|
||||
scard.fact.setModified(textChanged=True, deck=self)
|
||||
self.updateFactTags([scard.fact.id])
|
||||
self.s.flush()
|
||||
self.s.expunge(scard)
|
||||
|
@ -1425,7 +1429,6 @@ and due < :now""", now=time.time())
|
|||
cards = []
|
||||
self.s.save(fact)
|
||||
# update field cache
|
||||
fact.setModified(True)
|
||||
self.factCount += 1
|
||||
self.flushMod()
|
||||
isRandom = self.newCardOrder == NEW_CARDS_RANDOM
|
||||
|
@ -1440,6 +1443,8 @@ and due < :now""", now=time.time())
|
|||
card.combinedDue = due
|
||||
self.flushMod()
|
||||
cards.append(card)
|
||||
# update card q/a
|
||||
fact.setModified(True, self)
|
||||
self.updateFactTags([fact.id])
|
||||
# this will call reset() which will update counts
|
||||
self.updatePriorities([c.id for c in cards])
|
||||
|
@ -1496,13 +1501,17 @@ where factId = :fid and cardModelId = :cmid""",
|
|||
fid=fact.id, cmid=cardModel.id) == 0:
|
||||
# enough for 10 card models assuming 0.00001 timer precision
|
||||
card = anki.cards.Card(
|
||||
fact, cardModel, created=fact.created+0.0001*cardModel.ordinal)
|
||||
fact, cardModel,
|
||||
fact.created+0.0001*cardModel.ordinal)
|
||||
self.updateCardTags([card.id])
|
||||
self.updatePriority(card)
|
||||
self.cardCount += 1
|
||||
self.newCount += 1
|
||||
ids.append(card.id)
|
||||
self.setModified()
|
||||
|
||||
if ids:
|
||||
fact.setModified(textChanged=True, deck=self)
|
||||
self.setModified()
|
||||
return ids
|
||||
|
||||
def factIsInvalid(self, fact):
|
||||
|
@ -1565,6 +1574,7 @@ where facts.id not in (select distinct factId from cards)""")
|
|||
for cardModel in cms:
|
||||
card = anki.cards.Card(fact, cardModel)
|
||||
cards.append(card)
|
||||
fact.setModified(textChanged=True, deck=self, media=False)
|
||||
return cards
|
||||
|
||||
def cloneFact(self, oldFact):
|
||||
|
@ -1951,10 +1961,10 @@ and c.id in %s""" % ids2str(ids))
|
|||
else:
|
||||
mod = ""
|
||||
# tags
|
||||
cids = ids2str([x[0] for x in ids])
|
||||
tags = dict([(x[0], x[1:]) for x in
|
||||
self.splitTagsList(
|
||||
where="and cards.id in %s" %
|
||||
ids2str([x[0] for x in ids]))])
|
||||
where="and cards.id in %s" % cids)])
|
||||
facts = {}
|
||||
# fields
|
||||
for k, g in groupby(self.s.all("""
|
||||
|
@ -1968,9 +1978,33 @@ order by fields.factId""" % ids2str([x[2] for x in ids])),
|
|||
cms = {}
|
||||
for c in self.s.query(CardModel).all():
|
||||
cms[c.id] = c
|
||||
pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid])
|
||||
pend = [formatQA(cid, mid, facts[fid], tags[cid], cms[cmid], self)
|
||||
for (cid, cmid, fid, mid) in ids]
|
||||
if pend:
|
||||
# find existing media references
|
||||
files = {}
|
||||
for txt in self.s.column0(
|
||||
"select question || answer from cards where id in %s" %
|
||||
cids):
|
||||
for f in mediaFiles(txt):
|
||||
if f in files:
|
||||
files[f] -= 1
|
||||
else:
|
||||
files[f] = -1
|
||||
# determine ref count delta
|
||||
for p in pend:
|
||||
for type in ("question", "answer"):
|
||||
txt = p[type]
|
||||
for f in mediaFiles(txt):
|
||||
if f in files:
|
||||
files[f] += 1
|
||||
else:
|
||||
files[f] = 1
|
||||
# update references - this could be more efficient
|
||||
for (f, cnt) in files.items():
|
||||
if not cnt:
|
||||
continue
|
||||
updateMediaCount(self, f, cnt)
|
||||
# update q/a
|
||||
self.s.execute("""
|
||||
update cards set
|
||||
|
@ -1979,7 +2013,8 @@ order by fields.factId""" % ids2str([x[2] for x in ids])),
|
|||
where id = :id""" % mod, pend)
|
||||
# update fields cache
|
||||
self.updateFieldCache(facts.keys())
|
||||
self.flushMod()
|
||||
if dirty:
|
||||
self.flushMod()
|
||||
|
||||
def updateFieldCache(self, fids):
|
||||
"Add stripped HTML cache for sorting/searching."
|
||||
|
@ -3018,7 +3053,7 @@ where key = :key""", key=key, value=value):
|
|||
if not self.tmpMediaDir and create:
|
||||
self.tmpMediaDir = tempfile.mkdtemp(prefix="anki")
|
||||
dir = self.tmpMediaDir
|
||||
if not os.path.exists(dir):
|
||||
if not dir or not os.path.exists(dir):
|
||||
return None
|
||||
# change to the current dir
|
||||
os.chdir(dir)
|
||||
|
@ -3090,6 +3125,7 @@ Return new path, relative to media dir."""
|
|||
self.s = None
|
||||
|
||||
def setModified(self, newTime=None):
|
||||
#import traceback; traceback.print_stack()
|
||||
self.modified = newTime or time.time()
|
||||
|
||||
def flushMod(self):
|
||||
|
@ -3878,6 +3914,7 @@ order by priority desc, due desc""")
|
|||
# we're opening a shared deck with no indices - we'll need
|
||||
# them if we want to rebuild the queue
|
||||
DeckStorage._addIndices(deck)
|
||||
oldmod = deck.modified
|
||||
else:
|
||||
prog = False
|
||||
deck.path = path
|
||||
|
@ -3996,10 +4033,6 @@ select filename, size, created, originalPath, description from media""")
|
|||
deck.s.statements("""
|
||||
insert into media values (
|
||||
:id, :filename, :size, :created, :originalPath, :description)""", h)
|
||||
# rerun check
|
||||
anki.media.rebuildMediaDir(deck, dirty=False)
|
||||
# no need to track deleted media yet
|
||||
deck.s.execute("delete from mediaDeleted")
|
||||
deck.version = 9
|
||||
if deck.version < 10:
|
||||
deck.s.statement("""
|
||||
|
@ -4211,7 +4244,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
|
|||
deck.failedCardMax = 0
|
||||
deck.version = 37
|
||||
deck.s.commit()
|
||||
# skip 38
|
||||
if deck.version < 39:
|
||||
deck.reset()
|
||||
# manually suspend all suspended cards
|
||||
|
@ -4232,7 +4264,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
|
|||
deck.s.statement("update models set features = ''")
|
||||
deck.version = 40
|
||||
deck.s.commit()
|
||||
# skip 41
|
||||
if deck.version < 42:
|
||||
deck.version = 42
|
||||
deck.s.commit()
|
||||
|
@ -4270,7 +4301,6 @@ nextFactor, reps, thinkingTime, yesCount, noCount from reviewHistory""")
|
|||
DeckStorage._addIndices(deck)
|
||||
deck.version = 50
|
||||
deck.s.commit()
|
||||
# skip 51
|
||||
if deck.version < 52:
|
||||
dname = deck.name()
|
||||
sname = deck.syncName
|
||||
|
@ -4329,6 +4359,11 @@ update cards set due = created, combinedDue = created
|
|||
where relativeDelay = 2""")
|
||||
deck.version = 58
|
||||
deck.s.commit()
|
||||
if deck.version < 60:
|
||||
# rebuild the media db based on new format
|
||||
rebuildMediaDir(deck, dirty=False)
|
||||
deck.version = 60
|
||||
deck.s.commit()
|
||||
# executing a pragma here is very slow on large decks, so we store
|
||||
# our own record
|
||||
if not deck.getInt("pageSize") == 4096:
|
||||
|
@ -4339,6 +4374,7 @@ where relativeDelay = 2""")
|
|||
deck.setVar("pageSize", 4096, mod=False)
|
||||
deck.s.commit()
|
||||
if prog:
|
||||
assert deck.modified == oldmod
|
||||
deck.finishProgress()
|
||||
return deck
|
||||
_upgradeDeck = staticmethod(_upgradeDeck)
|
||||
|
|
|
@ -129,19 +129,15 @@ class Fact(object):
|
|||
def focusLost(self, field):
|
||||
runHook('fact.focusLost', self, field)
|
||||
|
||||
def setModified(self, textChanged=False):
|
||||
def setModified(self, textChanged=False, deck=None, media=True):
|
||||
"Mark modified and update cards."
|
||||
self.modified = time.time()
|
||||
if textChanged:
|
||||
d = {}
|
||||
for f in self.model.fieldModels:
|
||||
d[f.name] = (f.id, self[f.name])
|
||||
self.spaceUntil = stripHTMLMedia(u" ".join([x[1] for x in d.values()]))
|
||||
assert deck
|
||||
self.spaceUntil = stripHTMLMedia(u" ".join(
|
||||
self.values()))
|
||||
for card in self.cards:
|
||||
qa = formatQA(None, self.modelId, d, card.splitTags(), card.cardModel)
|
||||
card.question = qa['question']
|
||||
card.answer = qa['answer']
|
||||
card.setModified()
|
||||
card.rebuildQA(deck)
|
||||
|
||||
# Fact deletions
|
||||
##########################################################################
|
||||
|
|
380
anki/media.py
380
anki/media.py
|
@ -8,16 +8,13 @@ Media support
|
|||
"""
|
||||
__docformat__ = 'restructuredtext'
|
||||
|
||||
import os, stat, time, shutil, re, sys, urllib2
|
||||
import os, shutil, re, urllib2, time
|
||||
from anki.db import *
|
||||
from anki.facts import Fact
|
||||
from anki.utils import addTags, genID, ids2str, checksum
|
||||
from anki.utils import checksum, genID
|
||||
from anki.lang import _
|
||||
|
||||
regexps = (("(\[sound:([^]]+)\])",
|
||||
"[sound:%s]"),
|
||||
("(<img src=[\"']?([^\"'>]+)[\"']? ?/?>)",
|
||||
"<img src=\"%s\">"))
|
||||
regexps = ("(\[sound:([^]]+)\])",
|
||||
"(<img src=[\"']?([^\"'>]+)[\"']? ?/?>)")
|
||||
|
||||
# Tables
|
||||
##########################################################################
|
||||
|
@ -26,9 +23,15 @@ mediaTable = Table(
|
|||
'media', metadata,
|
||||
Column('id', Integer, primary_key=True, nullable=False),
|
||||
Column('filename', UnicodeText, nullable=False),
|
||||
# reused as reference count
|
||||
Column('size', Integer, nullable=False),
|
||||
# treated as modification date, not creation date
|
||||
Column('created', Float, nullable=False),
|
||||
# reused as md5sum. empty string if file doesn't exist on disk
|
||||
Column('originalPath', UnicodeText, nullable=False, default=u""),
|
||||
# older versions stored original filename here, so we'll leave it for now
|
||||
# in case we add a feature to rename media back to its original name. in
|
||||
# the future we may want to zero this to save space
|
||||
Column('description', UnicodeText, nullable=False, default=u""))
|
||||
|
||||
class Media(object):
|
||||
|
@ -42,258 +45,179 @@ mediaDeletedTable = Table(
|
|||
nullable=False),
|
||||
Column('deletedTime', Float, nullable=False))
|
||||
|
||||
# Helper functions
|
||||
# File handling
|
||||
##########################################################################
|
||||
|
||||
def mediaFilename(path):
|
||||
"Return checksum.ext for path"
|
||||
new = checksum(open(path, "rb").read())
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
return "%s%s" % (new, ext)
|
||||
|
||||
def copyToMedia(deck, path):
|
||||
"""Copy PATH to MEDIADIR, and return new filename.
|
||||
Update media table. If file already exists, don't copy."""
|
||||
origPath = path
|
||||
description = os.path.splitext(os.path.basename(path))[0]
|
||||
newBase = mediaFilename(path)
|
||||
new = os.path.join(deck.mediaDir(create=True), newBase)
|
||||
# copy if not existing
|
||||
if not os.path.exists(new):
|
||||
if new.lower() == path.lower():
|
||||
# case insensitive filesystems suck
|
||||
os.rename(path, new)
|
||||
|
||||
If a file with the same md5sum exists in the DB, return that.
|
||||
If a file with the same name exists, return a unique name.
|
||||
This does not modify the media table."""
|
||||
# see if have duplicate contents
|
||||
newpath = deck.s.scalar(
|
||||
"select filename from media where originalPath = :cs",
|
||||
cs=checksum(open(path, "rb").read()))
|
||||
# check if this filename already exists
|
||||
if not newpath:
|
||||
base = os.path.basename(path)
|
||||
mdir = deck.mediaDir(create=True)
|
||||
newpath = uniquePath(mdir, base)
|
||||
shutil.copy2(path, newpath)
|
||||
return os.path.basename(newpath)
|
||||
|
||||
def uniquePath(dir, base):
|
||||
# remove any dangerous characters
|
||||
base = re.sub(r"[][<>:/\\]", "", base)
|
||||
# find a unique name
|
||||
(root, ext) = os.path.splitext(base)
|
||||
def repl(match):
|
||||
n = int(match.group(1))
|
||||
return " (%d)" % (n+1)
|
||||
while True:
|
||||
path = os.path.join(dir, root + ext)
|
||||
if not os.path.exists(path):
|
||||
break
|
||||
reg = " \((\d+)\)$"
|
||||
if not re.search(reg, root):
|
||||
root = root + " (1)"
|
||||
else:
|
||||
shutil.copy2(path, new)
|
||||
newSize = os.stat(new)[stat.ST_SIZE]
|
||||
if not deck.s.scalar(
|
||||
"select 1 from media where filename = :f",
|
||||
f=newBase):
|
||||
# if the user has modified a hashed file, try to remember the old
|
||||
# filename
|
||||
old = deck.s.scalar(
|
||||
"select originalPath from media where filename = :s",
|
||||
s=os.path.basename(origPath))
|
||||
if old:
|
||||
origPath = old
|
||||
description = os.path.splitext(os.path.basename(origPath))[0]
|
||||
root = re.sub(reg, repl, root)
|
||||
return path
|
||||
|
||||
# DB routines
|
||||
##########################################################################
|
||||
|
||||
def updateMediaCount(deck, file, count=1):
|
||||
mdir = deck.mediaDir()
|
||||
if deck.s.scalar(
|
||||
"select 1 from media where filename = :file", file=file):
|
||||
deck.s.statement(
|
||||
"update media set size = size + :c, created = :t where filename = :file",
|
||||
file=file, c=count, t=time.time())
|
||||
elif count > 0:
|
||||
try:
|
||||
path = unicode(path, sys.getfilesystemencoding())
|
||||
except TypeError:
|
||||
pass
|
||||
sum = unicode(
|
||||
checksum(open(os.path.join(mdir, file), "rb").read()))
|
||||
except:
|
||||
sum = u""
|
||||
deck.s.statement("""
|
||||
insert into media (id, filename, size, created, originalPath,
|
||||
description)
|
||||
values (:id, :filename, :size, :created, :originalPath,
|
||||
:description)""",
|
||||
id=genID(),
|
||||
filename=newBase,
|
||||
size=newSize,
|
||||
created=time.time(),
|
||||
originalPath=origPath,
|
||||
description=description)
|
||||
deck.flushMod()
|
||||
return newBase
|
||||
insert into media (id, filename, size, created, originalPath, description)
|
||||
values (:id, :file, :c, :mod, :sum, '')""",
|
||||
id=genID(), file=file, c=count, mod=time.time(),
|
||||
sum=sum)
|
||||
|
||||
def _modifyFields(deck, fieldsToUpdate, modifiedFacts, dirty):
|
||||
factIds = ids2str(modifiedFacts.keys())
|
||||
if fieldsToUpdate:
|
||||
deck.s.execute("update fields set value = :val where id = :id",
|
||||
fieldsToUpdate)
|
||||
deck.s.statement(
|
||||
"update facts set modified = :time where id in %s" %
|
||||
factIds, time=time.time())
|
||||
ids = deck.s.all("""select cards.id, cards.cardModelId, facts.id,
|
||||
facts.modelId from cards, facts where
|
||||
cards.factId = facts.id and facts.id in %s"""
|
||||
% factIds)
|
||||
deck.updateCardQACache(ids, dirty)
|
||||
deck.flushMod()
|
||||
def removeUnusedMedia(deck):
|
||||
ids = deck.s.column0("select id from media where size = 0")
|
||||
for id in ids:
|
||||
deck.s.statement("insert into mediaDeleted values (:id, :t)",
|
||||
id=id, t=time.time())
|
||||
deck.s.statement("delete from media where size = 0")
|
||||
|
||||
# String manipulation
|
||||
##########################################################################
|
||||
|
||||
def mediaRefs(string):
|
||||
"Return list of (fullMatch, filename, replacementString)."
|
||||
def mediaFiles(string):
|
||||
l = []
|
||||
for (reg, repl) in regexps:
|
||||
for reg in regexps:
|
||||
for (full, fname) in re.findall(reg, string):
|
||||
l.append((full, fname, repl))
|
||||
l.append(fname)
|
||||
return l
|
||||
|
||||
def stripMedia(txt):
|
||||
for (reg, x) in regexps:
|
||||
for reg in regexps:
|
||||
txt = re.sub(reg, "", txt)
|
||||
return txt
|
||||
|
||||
# Rebuilding DB
|
||||
##########################################################################
|
||||
|
||||
def rebuildMediaDir(deck, deleteRefs=False, dirty=True):
|
||||
"Delete references to missing files, delete unused files."
|
||||
localFiles = {}
|
||||
modifiedFacts = {}
|
||||
unmodifiedFacts = {}
|
||||
renamedFiles = {}
|
||||
existingFiles = {}
|
||||
factsMissingMedia = {}
|
||||
updateFields = []
|
||||
usedFiles = {}
|
||||
unusedFileCount = 0
|
||||
missingFileCount = 0
|
||||
deck.mediaDir(create=True)
|
||||
deck.startProgress(16, 0, _("Check Media DB"))
|
||||
# rename all files to checksum versions, note non-renamed ones
|
||||
deck.updateProgress(_("Checksum files..."))
|
||||
files = os.listdir(unicode(deck.mediaDir()))
|
||||
mod = len(files) / 10
|
||||
for c, oldBase in enumerate(files):
|
||||
if mod and not c % mod:
|
||||
deck.updateProgress()
|
||||
if oldBase.startswith("latex-"):
|
||||
continue
|
||||
oldPath = os.path.join(deck.mediaDir(), oldBase)
|
||||
if oldBase.startswith("."):
|
||||
continue
|
||||
if os.path.isdir(oldPath):
|
||||
continue
|
||||
newBase = copyToMedia(deck, oldPath)
|
||||
if oldBase.lower() == newBase.lower():
|
||||
existingFiles[oldBase] = 1
|
||||
else:
|
||||
renamedFiles[oldBase] = newBase
|
||||
deck.updateProgress(value=10)
|
||||
# now look through all fields, and update references to files
|
||||
deck.updateProgress(_("Scan fields..."))
|
||||
for (id, fid, val) in deck.s.all(
|
||||
"select id, factId, value from fields"):
|
||||
oldval = val
|
||||
for (full, fname, repl) in mediaRefs(val):
|
||||
if fname in renamedFiles:
|
||||
# renamed
|
||||
newBase = renamedFiles[fname]
|
||||
val = re.sub(re.escape(full), repl % newBase, val)
|
||||
usedFiles[newBase] = 1
|
||||
elif fname in existingFiles:
|
||||
# used & current
|
||||
usedFiles[fname] = 1
|
||||
else:
|
||||
# missing
|
||||
missingFileCount += 1
|
||||
if deleteRefs:
|
||||
val = re.sub(re.escape(full), "", val)
|
||||
def rebuildMediaDir(deck, delete=False, dirty=True):
|
||||
deck.startProgress(title=_("Check Media DB"))
|
||||
mdir = deck.mediaDir(create=True)
|
||||
# set all ref counts to 0
|
||||
deck.s.statement("update media set size = 0")
|
||||
# look through cards for media references
|
||||
refs = {}
|
||||
for (question, answer) in deck.s.all(
|
||||
"select question, answer from cards"):
|
||||
for txt in (question, answer):
|
||||
for f in mediaFiles(txt):
|
||||
if f in refs:
|
||||
refs[f] += 1
|
||||
else:
|
||||
factsMissingMedia[fid] = 1
|
||||
if val != oldval:
|
||||
updateFields.append({'id': id, 'val': val})
|
||||
modifiedFacts[fid] = 1
|
||||
else:
|
||||
if fid not in factsMissingMedia:
|
||||
unmodifiedFacts[fid] = 1
|
||||
# update modified fields
|
||||
deck.updateProgress(_("Modify fields..."))
|
||||
if modifiedFacts:
|
||||
_modifyFields(deck, updateFields, modifiedFacts, dirty)
|
||||
# fix tags
|
||||
deck.updateProgress(_("Update tags..."))
|
||||
if dirty:
|
||||
deck.deleteTags(unmodifiedFacts.keys(), _("MediaMissing"))
|
||||
if deleteRefs:
|
||||
deck.deleteTags(modifiedFacts.keys(), _("MediaMissing"))
|
||||
else:
|
||||
deck.addTags(factsMissingMedia.keys(), _("MediaMissing"))
|
||||
# build cache of db records
|
||||
deck.updateProgress(_("Delete unused files..."))
|
||||
mediaIds = dict(deck.s.all("select filename, id from media"))
|
||||
# look through the media dir for any unused files, and delete
|
||||
for f in os.listdir(unicode(deck.mediaDir())):
|
||||
if f.startswith("."):
|
||||
refs[f] = 1
|
||||
# update ref counts
|
||||
for (file, count) in refs.items():
|
||||
updateMediaCount(deck, file, count)
|
||||
# find unused media
|
||||
unused = []
|
||||
for file in os.listdir(mdir):
|
||||
path = os.path.join(mdir, file)
|
||||
if not os.path.isfile(path):
|
||||
# ignore directories
|
||||
continue
|
||||
if f.startswith("latex-"):
|
||||
continue
|
||||
path = os.path.join(deck.mediaDir(), f)
|
||||
if os.path.isdir(path):
|
||||
shutil.rmtree(path)
|
||||
continue
|
||||
if f in usedFiles:
|
||||
try:
|
||||
del mediaIds[f]
|
||||
except:
|
||||
pass # case errors
|
||||
else:
|
||||
if file not in refs:
|
||||
unused.append(file)
|
||||
# optionally delete
|
||||
if delete:
|
||||
removeUnusedMedia(deck)
|
||||
for f in unused:
|
||||
path = os.path.join(mdir, f)
|
||||
os.unlink(path)
|
||||
unusedFileCount += 1
|
||||
deck.updateProgress(_("Delete stale references..."))
|
||||
for (fname, id) in mediaIds.items():
|
||||
# maybe delete from db
|
||||
if id:
|
||||
deck.s.statement("delete from media where id = :id", id=id)
|
||||
deck.s.statement("""
|
||||
insert into mediaDeleted (mediaId, deletedTime)
|
||||
values (:id, strftime('%s', 'now'))""", id=id)
|
||||
# update deck and save
|
||||
deck.flushMod()
|
||||
deck.save()
|
||||
# check md5s are up to date
|
||||
update = []
|
||||
for (file, created, md5) in deck.s.all(
|
||||
"select filename, created, originalPath from media"):
|
||||
path = os.path.join(mdir, file)
|
||||
if not os.path.exists(path):
|
||||
if md5:
|
||||
update.append({'f':file, 'sum':u"", 'c':time.time()})
|
||||
else:
|
||||
sum = unicode(
|
||||
checksum(open(os.path.join(mdir, file), "rb").read()))
|
||||
if md5 != sum:
|
||||
update.append({'f':file, 'sum':sum, 'c':time.time()})
|
||||
if update:
|
||||
deck.s.statements("""
|
||||
update media set originalPath = :sum, created = :c where filename = :f""",
|
||||
update)
|
||||
# update deck and get return info
|
||||
if dirty:
|
||||
deck.flushMod()
|
||||
have = deck.s.scalar("select count() from media where originalPath != ''")
|
||||
nohave = deck.s.column0("select filename from media where originalPath = ''")
|
||||
deck.finishProgress()
|
||||
return missingFileCount, unusedFileCount - len(renamedFiles)
|
||||
return (have, nohave, unused)
|
||||
|
||||
# Download missing
|
||||
##########################################################################
|
||||
|
||||
def downloadMissing(deck):
|
||||
from anki.latex import renderLatex
|
||||
urls = dict(
|
||||
deck.s.all("select id, features from models where features != ''"))
|
||||
if not urls:
|
||||
urlbase = deck.getVar("mediaURL")
|
||||
if not urlbase:
|
||||
return None
|
||||
mdir = deck.mediaDir(create=True)
|
||||
deck.startProgress()
|
||||
missing = {}
|
||||
for (id, fid, val, mid) in deck.s.all("""
|
||||
select fields.id, factId, value, modelId from fields, facts
|
||||
where facts.id = fields.factId"""):
|
||||
# add latex tags
|
||||
val = renderLatex(deck, val, False)
|
||||
for (full, fname, repl) in mediaRefs(val):
|
||||
if not os.path.exists(os.path.join(mdir, fname)) and mid in urls:
|
||||
missing[fname] = mid
|
||||
for c, file in enumerate(missing.keys()):
|
||||
deck.updateProgress(label=_("Downloading %(a)d of %(b)d...") % {
|
||||
'a': c,
|
||||
'b': len(missing),
|
||||
})
|
||||
try:
|
||||
path = urls[missing[file]] + file
|
||||
url = urllib2.urlopen(path)
|
||||
open(file, "wb").write(url.read())
|
||||
except:
|
||||
deck.finishProgress()
|
||||
return (False, path)
|
||||
missing = 0
|
||||
grabbed = 0
|
||||
for c, (f, sum) in enumerate(deck.s.all(
|
||||
"select filename, not not originalPath from media")):
|
||||
path = os.path.join(mdir, f)
|
||||
if not os.path.exists(path):
|
||||
try:
|
||||
rpath = urlbase + f
|
||||
url = urllib2.urlopen(rpath)
|
||||
open(f, "wb").write(url.read())
|
||||
grabbed += 1
|
||||
except:
|
||||
if sum:
|
||||
# the file is supposed to exist
|
||||
deck.finishProgress()
|
||||
return (False, rpath)
|
||||
else:
|
||||
# ignore and keep going
|
||||
missing += 1
|
||||
deck.updateProgress(label=_("File %d...") % (grabbed+missing))
|
||||
deck.finishProgress()
|
||||
return (True, len(missing))
|
||||
|
||||
# Export original files
|
||||
##########################################################################
|
||||
|
||||
def exportOriginalFiles(deck):
|
||||
deck.startProgress()
|
||||
origDir = deck.mediaDir(create=True)
|
||||
newDir = origDir.replace(".media", ".originals")
|
||||
try:
|
||||
os.mkdir(newDir)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
cnt = 0
|
||||
for row in deck.s.all("select filename, originalPath from media"):
|
||||
(fname, path) = row
|
||||
base = os.path.basename(path)
|
||||
if base == fname:
|
||||
continue
|
||||
cnt += 1
|
||||
deck.updateProgress(label="Exporting %s" % base)
|
||||
old = os.path.join(origDir, fname)
|
||||
new = os.path.join(newDir, base)
|
||||
if os.path.exists(new):
|
||||
new = re.sub("(.*)(\..*?)$", "\\1-%s\\2" %
|
||||
os.path.splitext(fname)[0], new)
|
||||
shutil.copy2(old, new)
|
||||
deck.finishProgress()
|
||||
return cnt
|
||||
return (True, grabbed, missing)
|
||||
|
|
|
@ -132,7 +132,7 @@ class CardModel(object):
|
|||
|
||||
mapper(CardModel, cardModelsTable)
|
||||
|
||||
def formatQA(cid, mid, fact, tags, cm):
|
||||
def formatQA(cid, mid, fact, tags, cm, deck):
|
||||
"Return a dict of {id, question, answer}"
|
||||
d = {'id': cid}
|
||||
fields = {}
|
||||
|
@ -154,9 +154,9 @@ def formatQA(cid, mid, fact, tags, cm):
|
|||
# convert old style
|
||||
format = re.sub("%\((.+?)\)s", "{{\\1}}", format)
|
||||
# allow custom rendering functions & info
|
||||
fields = runFilter("prepareFields", fields, cid, mid, fact, tags, cm)
|
||||
fields = runFilter("prepareFields", fields, cid, mid, fact, tags, cm, deck)
|
||||
html = render(format, fields)
|
||||
d[type] = runFilter("formatQA", html, type, cid, mid, fact, tags, cm)
|
||||
d[type] = runFilter("formatQA", html, type, cid, mid, fact, tags, cm, deck)
|
||||
return d
|
||||
|
||||
# Model table
|
||||
|
|
|
@ -35,7 +35,7 @@ from anki.stats import Stats, globalStats
|
|||
from anki.history import CardHistoryEntry
|
||||
from anki.stats import globalStats
|
||||
from anki.utils import ids2str, hexifyID, checksum
|
||||
from anki.media import mediaRefs
|
||||
from anki.media import mediaFiles
|
||||
from anki.lang import _
|
||||
from hooks import runHook
|
||||
|
||||
|
@ -1204,7 +1204,7 @@ select %(c)s from cards where
|
|||
%(c)s like '%%<img %%'
|
||||
or %(c)s like '%%[sound:%%'""" % {'c': col})
|
||||
for entry in txt:
|
||||
for (full, fname, repl) in mediaRefs(entry):
|
||||
for fname in mediaFiles(entry):
|
||||
used[fname] = True
|
||||
# copy only used media
|
||||
for file in files:
|
||||
|
|
106
tests/test_media.py
Normal file
106
tests/test_media.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
# coding: utf-8
|
||||
|
||||
import tempfile, os, time
|
||||
import anki.media as m
|
||||
from anki import DeckStorage
|
||||
from anki.stdmodels import BasicModel
|
||||
from anki.utils import checksum
|
||||
|
||||
# uniqueness check
|
||||
def test_unique():
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
# new file
|
||||
n = "foo.jpg"
|
||||
new = os.path.basename(m.uniquePath(dir, n))
|
||||
assert new == n
|
||||
# duplicate file
|
||||
open(os.path.join(dir, n), "w").write("hello")
|
||||
n = "foo.jpg"
|
||||
new = os.path.basename(m.uniquePath(dir, n))
|
||||
assert new == "foo (1).jpg"
|
||||
# another duplicate
|
||||
open(os.path.join(dir, "foo (1).jpg"), "w").write("hello")
|
||||
n = "foo.jpg"
|
||||
new = os.path.basename(m.uniquePath(dir, n))
|
||||
assert new == "foo (2).jpg"
|
||||
|
||||
# copying files to media folder
|
||||
def test_copy():
|
||||
deck = DeckStorage.Deck()
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
path = os.path.join(dir, "foo.jpg")
|
||||
open(path, "w").write("hello")
|
||||
# new file
|
||||
assert m.copyToMedia(deck, path) == "foo.jpg"
|
||||
# dupe md5
|
||||
deck.s.statement("""
|
||||
insert into media values (null, 'foo.jpg', 0, 0, :sum, '')""",
|
||||
sum=checksum("hello"))
|
||||
path = os.path.join(dir, "bar.jpg")
|
||||
open(path, "w").write("hello")
|
||||
assert m.copyToMedia(deck, path) == "foo.jpg"
|
||||
|
||||
# media db
|
||||
def test_db():
|
||||
deck = DeckStorage.Deck()
|
||||
deck.addModel(BasicModel())
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
path = os.path.join(dir, "foo.jpg")
|
||||
open(path, "w").write("hello")
|
||||
# add a new fact that references it twice
|
||||
f = deck.newFact()
|
||||
f['Front'] = u"<img src='foo.jpg'>"
|
||||
f['Back'] = u"back [sound:foo.jpg]"
|
||||
deck.addFact(f)
|
||||
# 1 entry in the media db, with two references, and missing file
|
||||
assert deck.s.scalar("select count() from media") == 1
|
||||
assert deck.s.scalar("select size from media") == 2
|
||||
assert deck.s.scalar("select not originalPath from media")
|
||||
# copy to media folder & check db
|
||||
path = m.copyToMedia(deck, path)
|
||||
m.rebuildMediaDir(deck)
|
||||
# md5 should be set now
|
||||
assert deck.s.scalar("select count() from media") == 1
|
||||
assert deck.s.scalar("select size from media") == 2
|
||||
assert deck.s.scalar("select originalPath from media")
|
||||
# edit the fact to remove a reference
|
||||
f['Back'] = u""
|
||||
f.setModified(True, deck)
|
||||
deck.s.flush()
|
||||
assert deck.s.scalar("select count() from media") == 1
|
||||
assert deck.s.scalar("select size from media") == 1
|
||||
# remove the front reference too
|
||||
f['Front'] = u""
|
||||
f.setModified(True, deck)
|
||||
assert deck.s.scalar("select size from media") == 0
|
||||
# add the reference back
|
||||
f['Front'] = u"<img src='foo.jpg'>"
|
||||
f.setModified(True, deck)
|
||||
assert deck.s.scalar("select size from media") == 1
|
||||
# detect file modifications
|
||||
oldsum = deck.s.scalar("select originalPath from media")
|
||||
open(path, "w").write("world")
|
||||
m.rebuildMediaDir(deck)
|
||||
newsum = deck.s.scalar("select originalPath from media")
|
||||
assert newsum and newsum != oldsum
|
||||
# delete underlying file and check db
|
||||
os.unlink(path)
|
||||
m.rebuildMediaDir(deck)
|
||||
# md5 should be gone again
|
||||
assert deck.s.scalar("select count() from media") == 1
|
||||
assert deck.s.scalar("select not originalPath from media")
|
||||
# media db should pick up media defined via templates & bulk update
|
||||
f['Back'] = u"bar.jpg"
|
||||
f.setModified(True, deck)
|
||||
deck.s.flush()
|
||||
# modify template & regenerate
|
||||
assert deck.s.scalar("select count() from media") == 1
|
||||
assert deck.s.scalar("select sum(size) from media") == 1
|
||||
deck.currentModel.cardModels[0].aformat=u'<img src="{{{Back}}}">'
|
||||
deck.updateCardsFromModel(deck.currentModel)
|
||||
assert deck.s.scalar("select sum(size) from media") == 2
|
||||
assert deck.s.scalar("select count() from media") == 2
|
||||
deck.currentModel.cardModels[0].aformat=u'{{{Back}}}'
|
||||
deck.updateCardsFromModel(deck.currentModel)
|
||||
assert deck.s.scalar("select count() from media") == 2
|
||||
assert deck.s.scalar("select sum(size) from media") == 1
|
|
@ -254,8 +254,8 @@ def test_localsync_media():
|
|||
os.unlink(os.path.join(deck1media, "22161b29b0c18e068038021f54eee1ee.png"))
|
||||
rebuildMediaDir(deck1)
|
||||
client.sync()
|
||||
assert deck1.s.scalar("select count(1) from media") == 2
|
||||
assert deck2.s.scalar("select count(1) from media") == 2
|
||||
assert deck1.s.scalar("select count(1) from media") == 3
|
||||
assert deck2.s.scalar("select count(1) from media") == 3
|
||||
|
||||
# One way syncing
|
||||
##########################################################################
|
||||
|
|
Loading…
Reference in a new issue