refactor media

- created->modified
- size->refcnt
- originalPath->chksum
- remove description
This commit is contained in:
Damien Elmes 2011-02-22 05:54:28 +09:00
parent edd8f79ab9
commit 2d1d946504
4 changed files with 50 additions and 54 deletions

View file

@ -122,7 +122,7 @@ select models.id, models.modified from models, facts where
facts.modelId = models.id and facts.modelId = models.id and
facts.id in %s""" % ids2str([f[0] for f in facts])) facts.id in %s""" % ids2str([f[0] for f in facts]))
media = self.deck.db.all(""" media = self.deck.db.all("""
select id, created from media""") select id, modified from media""")
return { return {
# cards # cards
"cards": cards, "cards": cards,

View file

@ -17,22 +17,10 @@ regexps = ("(?i)(\[sound:([^]]+)\])",
mediaTable = Table( mediaTable = Table(
'media', metadata, 'media', metadata,
Column('id', Integer, primary_key=True, nullable=False), Column('id', Integer, primary_key=True, nullable=False),
Column('filename', UnicodeText, nullable=False), Column('filename', UnicodeText, nullable=False, unique=True),
# reused as reference count Column('refcnt', Integer, nullable=False),
Column('size', Integer, nullable=False), Column('modified', Float, nullable=False),
# treated as modification date, not creation date Column('chksum', UnicodeText, nullable=False, default=u""))
Column('created', Float, nullable=False),
# reused as md5sum. empty string if file doesn't exist on disk
Column('originalPath', UnicodeText, nullable=False, default=u""),
# older versions stored original filename here, so we'll leave it for now
# in case we add a feature to rename media back to its original name. in
# the future we may want to zero this to save space
Column('description', UnicodeText, nullable=False, default=u""))
class Media(object):
pass
mapper(Media, mediaTable)
mediaDeletedTable = Table( mediaDeletedTable = Table(
'mediaDeleted', metadata, 'mediaDeleted', metadata,
@ -51,7 +39,7 @@ If a file with the same name exists, return a unique name.
This does not modify the media table.""" This does not modify the media table."""
# see if have duplicate contents # see if have duplicate contents
newpath = deck.db.scalar( newpath = deck.db.scalar(
"select filename from media where originalPath = :cs", "select filename from media where chksum = :cs",
cs=checksum(open(path, "rb").read())) cs=checksum(open(path, "rb").read()))
# check if this filename already exists # check if this filename already exists
if not newpath: if not newpath:
@ -88,7 +76,7 @@ def updateMediaCount(deck, file, count=1):
if deck.db.scalar( if deck.db.scalar(
"select 1 from media where filename = :file", file=file): "select 1 from media where filename = :file", file=file):
deck.db.statement( deck.db.statement(
"update media set size = size + :c, created = :t where filename = :file", "update media set refcnt = refcnt + :c, modified = :t where filename = :file",
file=file, c=count, t=time.time()) file=file, c=count, t=time.time())
elif count > 0: elif count > 0:
try: try:
@ -97,17 +85,17 @@ def updateMediaCount(deck, file, count=1):
except: except:
sum = u"" sum = u""
deck.db.statement(""" deck.db.statement("""
insert into media (id, filename, size, created, originalPath, description) insert into media (id, filename, refcnt, modified, chksum)
values (:id, :file, :c, :mod, :sum, '')""", values (:id, :file, :c, :mod, :sum)""",
id=genID(), file=file, c=count, mod=time.time(), id=genID(), file=file, c=count, mod=time.time(),
sum=sum) sum=sum)
def removeUnusedMedia(deck): def removeUnusedMedia(deck):
ids = deck.db.column0("select id from media where size = 0") ids = deck.db.column0("select id from media where refcnt = 0")
for id in ids: for id in ids:
deck.db.statement("insert into mediaDeleted values (:id, :t)", deck.db.statement("insert into mediaDeleted values (:id, :t)",
id=id, t=time.time()) id=id, t=time.time())
deck.db.statement("delete from media where size = 0") deck.db.statement("delete from media where refcnt = 0")
# String manipulation # String manipulation
########################################################################## ##########################################################################
@ -147,7 +135,7 @@ def rebuildMediaDir(deck, delete=False, dirty=True):
return (0, 0) return (0, 0)
deck.startProgress(title=_("Check Media DB")) deck.startProgress(title=_("Check Media DB"))
# set all ref counts to 0 # set all ref counts to 0
deck.db.statement("update media set size = 0") deck.db.statement("update media set refcnt = 0")
# look through cards for media references # look through cards for media references
refs = {} refs = {}
normrefs = {} normrefs = {}
@ -186,8 +174,8 @@ def rebuildMediaDir(deck, delete=False, dirty=True):
removeUnusedMedia(deck) removeUnusedMedia(deck)
# check md5s are up to date # check md5s are up to date
update = [] update = []
for (file, created, md5) in deck.db.all( for (file, md5) in deck.db.all(
"select filename, created, originalPath from media"): "select filename, chksum from media"):
path = os.path.join(mdir, file) path = os.path.join(mdir, file)
if not os.path.exists(path): if not os.path.exists(path):
if md5: if md5:
@ -199,12 +187,12 @@ def rebuildMediaDir(deck, delete=False, dirty=True):
update.append({'f':file, 'sum':sum, 'c':time.time()}) update.append({'f':file, 'sum':sum, 'c':time.time()})
if update: if update:
deck.db.statements(""" deck.db.statements("""
update media set originalPath = :sum, created = :c where filename = :f""", update media set chksum = :sum, modified = :c where filename = :f""",
update) update)
# update deck and get return info # update deck and get return info
if dirty: if dirty:
deck.flushMod() deck.flushMod()
nohave = deck.db.column0("select filename from media where originalPath = ''") nohave = deck.db.column0("select filename from media where chksum = ''")
deck.finishProgress() deck.finishProgress()
return (nohave, unused) return (nohave, unused)
@ -220,7 +208,7 @@ def downloadMissing(deck):
missing = 0 missing = 0
grabbed = 0 grabbed = 0
for c, (f, sum) in enumerate(deck.db.all( for c, (f, sum) in enumerate(deck.db.all(
"select filename, originalPath from media")): "select filename, chksum from media")):
path = os.path.join(mdir, f) path = os.path.join(mdir, f)
if not os.path.exists(path): if not os.path.exists(path):
try: try:

View file

@ -29,35 +29,45 @@ def upgradeSchema(engine, s):
except: except:
pass pass
if ver < 75: if ver < 75:
# migrate cards # cards
###########
moveTable(s, "cards") moveTable(s, "cards")
import cards import cards
metadata.create_all(engine, tables=[cards.cardsTable]) metadata.create_all(engine, tables=[cards.cardsTable])
# move data across
s.execute(""" s.execute("""
insert into cards select id, factId, cardModelId, created, modified, insert into cards select id, factId, cardModelId, created, modified,
question, answer, 0, ordinal, 0, relativeDelay, type, lastInterval, interval, question, answer, 0, ordinal, 0, relativeDelay, type, lastInterval, interval,
due, factor, reps, successive, noCount from cards2""") due, factor, reps, successive, noCount from cards2""")
s.execute("drop table cards2") s.execute("drop table cards2")
# migrate tags # tags
###########
moveTable(s, "tags") moveTable(s, "tags")
moveTable(s, "cardTags") moveTable(s, "cardTags")
initTagTables(s) initTagTables(s)
# move data across
s.execute("insert or ignore into tags select id, tag, 0 from tags2") s.execute("insert or ignore into tags select id, tag, 0 from tags2")
s.execute(""" s.execute("""
insert or ignore into cardTags select cardId, tagId, src from cardTags2""") insert or ignore into cardTags select cardId, tagId, src from cardTags2""")
s.execute("drop table tags2") s.execute("drop table tags2")
s.execute("drop table cardTags2") s.execute("drop table cardTags2")
# migrate facts # facts
###########
moveTable(s, "facts") moveTable(s, "facts")
import facts import facts
metadata.create_all(engine, tables=[facts.factsTable]) metadata.create_all(engine, tables=[facts.factsTable])
# move data across
s.execute(""" s.execute("""
insert or ignore into facts select id, modelId, created, modified, tags, insert or ignore into facts select id, modelId, created, modified, tags,
spaceUntil from facts2""") spaceUntil from facts2""")
s.execute("drop table facts2") s.execute("drop table facts2")
# media
###########
moveTable(s, "media")
import media
metadata.create_all(engine, tables=[media.mediaTable])
s.execute("""
insert or ignore into media select id, filename, size, created,
originalPath from media2""")
s.execute("drop table media2")
return ver return ver
def updateIndices(deck): def updateIndices(deck):
@ -84,14 +94,10 @@ create index if not exists ix_cards_factId on cards (factId)""")
deck.db.statement(""" deck.db.statement("""
create index if not exists ix_fields_factId on fields (factId)""") create index if not exists ix_fields_factId on fields (factId)""")
deck.db.statement(""" deck.db.statement("""
create index if not exists ix_fields_fieldModelId on fields (fieldModelId)""")
deck.db.statement("""
create index if not exists ix_fields_chksum on fields (chksum)""") create index if not exists ix_fields_chksum on fields (chksum)""")
# media # media
deck.db.statement(""" deck.db.statement("""
create unique index if not exists ix_media_filename on media (filename)""") create index if not exists ix_media_chksum on media (chksum)""")
deck.db.statement("""
create index if not exists ix_media_originalPath on media (originalPath)""")
# deletion tracking # deletion tracking
deck.db.statement(""" deck.db.statement("""
create index if not exists ix_cardsDeleted_cardId on cardsDeleted (cardId)""") create index if not exists ix_cardsDeleted_cardId on cardsDeleted (cardId)""")
@ -173,6 +179,8 @@ cast(min(thinkingTime, 60)*1000 as int), 0 from reviewHistory""")
deck.db.execute("update cards set queue=-1 where queue between -3 and -1") deck.db.execute("update cards set queue=-1 where queue between -3 and -1")
deck.db.execute("update cards set queue=-2 where queue between 3 and 5") deck.db.execute("update cards set queue=-2 where queue between 3 and 5")
deck.db.execute("update cards set queue=-3 where queue between 6 and 8") deck.db.execute("update cards set queue=-3 where queue between 6 and 8")
# don't need an index on fieldModelId
deck.db.statement("drop index if exists ix_fields_fieldModelId")
# new indices for new cards table # new indices for new cards table
updateIndices(deck) updateIndices(deck)
deck.version = 75 deck.version = 75

View file

@ -34,7 +34,7 @@ def test_copy():
assert m.copyToMedia(deck, path) == "foo.jpg" assert m.copyToMedia(deck, path) == "foo.jpg"
# dupe md5 # dupe md5
deck.db.statement(""" deck.db.statement("""
insert into media values (null, 'foo.jpg', 0, 0, :sum, '')""", insert into media values (null, 'foo.jpg', 0, 0, :sum)""",
sum=checksum("hello")) sum=checksum("hello"))
path = os.path.join(dir, "bar.jpg") path = os.path.join(dir, "bar.jpg")
open(path, "w").write("hello") open(path, "w").write("hello")
@ -54,53 +54,53 @@ def test_db():
deck.addFact(f) deck.addFact(f)
# 1 entry in the media db, with two references, and missing file # 1 entry in the media db, with two references, and missing file
assert deck.db.scalar("select count() from media") == 1 assert deck.db.scalar("select count() from media") == 1
assert deck.db.scalar("select size from media") == 2 assert deck.db.scalar("select refcnt from media") == 2
assert deck.db.scalar("select not originalPath from media") assert not deck.db.scalar("select group_concat(chksum, '') from media")
# copy to media folder & check db # copy to media folder & check db
path = m.copyToMedia(deck, path) path = m.copyToMedia(deck, path)
m.rebuildMediaDir(deck) m.rebuildMediaDir(deck)
# md5 should be set now # md5 should be set now
assert deck.db.scalar("select count() from media") == 1 assert deck.db.scalar("select count() from media") == 1
assert deck.db.scalar("select size from media") == 2 assert deck.db.scalar("select refcnt from media") == 2
assert deck.db.scalar("select originalPath from media") assert deck.db.scalar("select group_concat(chksum, '') from media")
# edit the fact to remove a reference # edit the fact to remove a reference
f['Back'] = u"" f['Back'] = u""
f.setModified(True, deck) f.setModified(True, deck)
deck.db.flush() deck.db.flush()
assert deck.db.scalar("select count() from media") == 1 assert deck.db.scalar("select count() from media") == 1
assert deck.db.scalar("select size from media") == 1 assert deck.db.scalar("select refcnt from media") == 1
# remove the front reference too # remove the front reference too
f['Front'] = u"" f['Front'] = u""
f.setModified(True, deck) f.setModified(True, deck)
assert deck.db.scalar("select size from media") == 0 assert deck.db.scalar("select refcnt from media") == 0
# add the reference back # add the reference back
f['Front'] = u"<img src='foo.jpg'>" f['Front'] = u"<img src='foo.jpg'>"
f.setModified(True, deck) f.setModified(True, deck)
assert deck.db.scalar("select size from media") == 1 assert deck.db.scalar("select refcnt from media") == 1
# detect file modifications # detect file modifications
oldsum = deck.db.scalar("select originalPath from media") oldsum = deck.db.scalar("select chksum from media")
open(path, "w").write("world") open(path, "w").write("world")
m.rebuildMediaDir(deck) m.rebuildMediaDir(deck)
newsum = deck.db.scalar("select originalPath from media") newsum = deck.db.scalar("select chksum from media")
assert newsum and newsum != oldsum assert newsum and newsum != oldsum
# delete underlying file and check db # delete underlying file and check db
os.unlink(path) os.unlink(path)
m.rebuildMediaDir(deck) m.rebuildMediaDir(deck)
# md5 should be gone again # md5 should be gone again
assert deck.db.scalar("select count() from media") == 1 assert deck.db.scalar("select count() from media") == 1
assert deck.db.scalar("select not originalPath from media") assert deck.db.scalar("select not chksum from media")
# media db should pick up media defined via templates & bulk update # media db should pick up media defined via templates & bulk update
f['Back'] = u"bar.jpg" f['Back'] = u"bar.jpg"
f.setModified(True, deck) f.setModified(True, deck)
deck.db.flush() deck.db.flush()
# modify template & regenerate # modify template & regenerate
assert deck.db.scalar("select count() from media") == 1 assert deck.db.scalar("select count() from media") == 1
assert deck.db.scalar("select sum(size) from media") == 1 assert deck.db.scalar("select sum(refcnt) from media") == 1
deck.currentModel.cardModels[0].aformat=u'<img src="{{{Back}}}">' deck.currentModel.cardModels[0].aformat=u'<img src="{{{Back}}}">'
deck.updateCardsFromModel(deck.currentModel) deck.updateCardsFromModel(deck.currentModel)
assert deck.db.scalar("select sum(size) from media") == 2 assert deck.db.scalar("select sum(refcnt) from media") == 2
assert deck.db.scalar("select count() from media") == 2 assert deck.db.scalar("select count() from media") == 2
deck.currentModel.cardModels[0].aformat=u'{{{Back}}}' deck.currentModel.cardModels[0].aformat=u'{{{Back}}}'
deck.updateCardsFromModel(deck.currentModel) deck.updateCardsFromModel(deck.currentModel)
assert deck.db.scalar("select count() from media") == 2 assert deck.db.scalar("select count() from media") == 2
assert deck.db.scalar("select sum(size) from media") == 1 assert deck.db.scalar("select sum(refcnt) from media") == 1