refactor media change logging

This commit is contained in:
Damien Elmes 2011-09-24 17:16:49 +09:00
parent 9fdfac722d
commit 22df2790f9
4 changed files with 64 additions and 72 deletions

View file

@ -29,6 +29,10 @@ REM_GROUP = 2
COUNT_ANSWERED = 0 COUNT_ANSWERED = 0
COUNT_REMAINING = 1 COUNT_REMAINING = 1
# media log
MEDIA_ADD = 0
MEDIA_REM = 1
# Labels # Labels
########################################################################## ##########################################################################

View file

@ -7,6 +7,7 @@ import os, shutil, re, urllib, urllib2, time, unicodedata, \
from anki.utils import checksum, intTime, namedtmp, isWin from anki.utils import checksum, intTime, namedtmp, isWin
from anki.lang import _ from anki.lang import _
from anki.db import DB from anki.db import DB
from anki.consts import *
class MediaManager(object): class MediaManager(object):
@ -25,7 +26,7 @@ class MediaManager(object):
self.connect() self.connect()
def connect(self): def connect(self):
path = self.dir()+"db" path = self.dir()+".db"
create = not os.path.exists(path) create = not os.path.exists(path)
self.db = DB(path) self.db = DB(path)
if create: if create:
@ -159,17 +160,29 @@ If the same name exists, compare checksums."""
files.add(f) files.add(f)
return files return files
# Tracking changes # Tracking changes (public)
##########################################################################
def added(self):
self.findChanges()
return self.db.execute("select * from log where type = ?", MEDIA_ADD)
def removed(self):
self.findChanges()
return self.db.execute("select * from log where type = ?", MEDIA_REM)
def clearLog(self):
self.db.execute("delete from log")
# Tracking changes (private)
########################################################################## ##########################################################################
def _initDB(self): def _initDB(self):
# in the log, a mod time of zero indicates a delete # in the log, a mod time of zero indicates a delete
self.db.executescript(""" self.db.executescript("""
create table media (fname text primary key, csum text, mod int); create table media (fname text primary key, csum text, mod int);
create table meta (dirMod int, inSync int); create table meta (dirMod int); insert into meta values (0);
insert into meta values (0, 0); create table log (fname text primary key, type int);
create table log (id int, fname text, mod int);
create index ix_log_id on log (id);
""") """)
def _mtime(self, path): def _mtime(self, path):
@ -178,7 +191,7 @@ create index ix_log_id on log (id);
def _checksum(self, path): def _checksum(self, path):
return checksum(open(path, "rb").read()) return checksum(open(path, "rb").read())
def changed(self): def _changed(self):
"Return dir mtime if it has changed since the last findChanges()" "Return dir mtime if it has changed since the last findChanges()"
# doesn't track edits, but user can add or remove a file to update # doesn't track edits, but user can add or remove a file to update
mod = self.db.scalar("select dirMod from meta") mod = self.db.scalar("select dirMod from meta")
@ -189,22 +202,31 @@ create index ix_log_id on log (id);
def findChanges(self): def findChanges(self):
"Scan the media folder if it's changed, and note any changes." "Scan the media folder if it's changed, and note any changes."
if self.changed(): if self._changed():
self._logChanges() self._logChanges()
def changesSince(self, mod): def _logChanges(self):
"Return a list of added and removed files since MOD time." (added, removed) = self._changes()
self.findChanges() log = []
added = {} media = []
removed = {} mediaRem = []
# loop through and collect changes, removing duplicates for f in added:
for fname, mod in self.db.all( mt = self._mtime(f)
"select fname, mod from log where id > ?", mod): media.append((f, self._checksum(f), mt))
if mod: log.append((f, MEDIA_ADD))
added[fname] = mod for f in removed:
else: mediaRem.append((f,))
removed[fname] = mod log.append((f, MEDIA_REM))
return added.items(), removed.keys() # update media db
self.db.executemany("insert or replace into media values (?,?,?)",
media)
if mediaRem:
self.db.executemany("delete from media where fname = ?",
mediaRem)
self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
# and logs
self.db.executemany("insert or replace into log values (?,?)", log)
self.db.commit()
def _changes(self): def _changes(self):
self.cache = {} self.cache = {}
@ -213,7 +235,6 @@ create index ix_log_id on log (id);
self.cache[name] = [csum, mod, False] self.cache[name] = [csum, mod, False]
added = [] added = []
removed = [] removed = []
changed = []
# loop through on-disk files # loop through on-disk files
for f in os.listdir(self.dir()): for f in os.listdir(self.dir()):
# ignore folders # ignore folders
@ -227,40 +248,11 @@ create index ix_log_id on log (id);
if self._mtime(f) != self.cache[f][1]: if self._mtime(f) != self.cache[f][1]:
# and has different checksum? # and has different checksum?
if self._checksum(f) != self.cache[f][0]: if self._checksum(f) != self.cache[f][0]:
changed.append(f) added.append(f)
# mark as used # mark as used
self.cache[f][2] = True self.cache[f][2] = True
# look for any entries in the cache that no longer exist on disk # look for any entries in the cache that no longer exist on disk
for (k, v) in self.cache.items(): for (k, v) in self.cache.items():
if not v[2]: if not v[2]:
removed.append(k) removed.append(k)
return added, changed, removed return added, removed
def _logChanges(self):
(added, changed, removed) = self._changes()
log = []
media = []
mediaRem = []
t = intTime()
for f in added:
mt = self._mtime(f)
media.append((f, self._checksum(f), mt))
log.append((t, f, mt))
for f in changed:
mt = self._mtime(f)
media.append((f, self._checksum(f), mt))
log.append((t, f, 0))
log.append((t, f, mt))
for f in removed:
mediaRem.append((f,))
log.append((t, f, 0))
# update db
self.db.executemany("insert or replace into media values (?,?,?)",
media)
self.db.executemany("insert into log values (?,?,?)", log)
if mediaRem:
self.db.executemany("delete from media where fname = ?",
mediaRem)
self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
self.db.commit()

View file

@ -20,7 +20,6 @@ MIME_BOUNDARY = "Anki-sync-boundary"
SYNC_HOST = os.environ.get("SYNC_HOST") or "dev.ankiweb.net" SYNC_HOST = os.environ.get("SYNC_HOST") or "dev.ankiweb.net"
SYNC_PORT = int(os.environ.get("SYNC_PORT") or 80) SYNC_PORT = int(os.environ.get("SYNC_PORT") or 80)
SYNC_URL = "http://%s:%d/sync/" % (SYNC_HOST, SYNC_PORT) SYNC_URL = "http://%s:%d/sync/" % (SYNC_HOST, SYNC_PORT)
KEYS = ("models", "facts", "cards", "media")
# todo: # todo:
# - ensure all urllib references are converted to urllib2 for proxies # - ensure all urllib references are converted to urllib2 for proxies

View file

@ -62,10 +62,9 @@ def test_deckIntegration():
def test_changes(): def test_changes():
d = getEmptyDeck() d = getEmptyDeck()
assert d.media.changed() assert d.media._changed()
add, rem = d.media.changesSince(0) assert not list(d.media.added())
assert not add; assert not rem assert not list(d.media.removed())
assert not d.media.changed()
# add a file # add a file
dir = tempfile.mkdtemp(prefix="anki") dir = tempfile.mkdtemp(prefix="anki")
path = os.path.join(dir, "foo.jpg") path = os.path.join(dir, "foo.jpg")
@ -73,26 +72,24 @@ def test_changes():
time.sleep(1) time.sleep(1)
path = d.media.addFile(path) path = d.media.addFile(path)
# should have been logged # should have been logged
add, rem = d.media.changesSince(0) assert list(d.media.added())
assert add; assert not rem assert not list(d.media.removed())
mod = add[0][1]
# if we modify it, the cache won't notice # if we modify it, the cache won't notice
time.sleep(1) time.sleep(1)
open(path, "w").write("world") open(path, "w").write("world")
add, rem = d.media.changesSince(0) assert len(list(d.media.added())) == 1
assert len(add) == 1 assert not list(d.media.removed())
# but if we add another file, it will # but if we add another file, it will
time.sleep(1) time.sleep(1)
open(path+"2", "w").write("yo") open(path+"2", "w").write("yo")
add, rem = d.media.changesSince(0) assert len(list(d.media.added())) == 2
assert len(add) == 2 assert not list(d.media.removed())
assert len(rem) == 1
assert add[0][1] != mod
assert add[0][0] == "foo.jpg"
# deletions should get noticed too # deletions should get noticed too
time.sleep(1) time.sleep(1)
os.unlink(path+"2") os.unlink(path+"2")
add, rem = d.media.changesSince(0) assert len(list(d.media.added())) == 1
assert len(add) == 2 assert len(list(d.media.removed())) == 1
assert len(rem) == 2 # after a sync completes, it clears the log
d.media.clearLog()
assert len(list(d.media.added())) == 0
assert len(list(d.media.removed())) == 0