mirror of
https://github.com/ankitects/anki.git
synced 2025-09-21 15:32:23 -04:00
add change detection
I removed the media database in an earlier commit, but it's now necessary again as I decided to add native media syncing to AnkiWeb. This time, the DB is stored in the media folder rather than with the deck. This means we avoid sending it in a full sync, and makes deck backups faster. The DB is a cache of file modtimes and checksums. When findChanges() is called, the code checks to see which files were added, changed or deleted since the last time, and updates the log of changes. Because the scanning step and log retrieval is separate, it's possible to do the scanning in the background if the need arises. If the DB is deleted by the user, Anki will forget any deletions, and add all the files back to the DB the next time it's accessed. File changes are recorded as a delete + add. media.addFile() could be optimized in the future to log media added manually by the user, allowing us to skip the full directory scan in cases where the only changes were manually added media.
This commit is contained in:
parent
7e1df75cc2
commit
c59dd854fb
4 changed files with 160 additions and 17 deletions
124
anki/media.py
124
anki/media.py
|
@ -6,6 +6,7 @@ import os, shutil, re, urllib, urllib2, time, unicodedata, \
|
|||
urllib, sys, shutil
|
||||
from anki.utils import checksum, intTime, namedtmp, isWin
|
||||
from anki.lang import _
|
||||
from anki.db import DB
|
||||
|
||||
class MediaManager(object):
|
||||
|
||||
|
@ -15,14 +16,19 @@ class MediaManager(object):
|
|||
|
||||
def __init__(self, deck):
|
||||
self.deck = deck
|
||||
self._dir = None
|
||||
# media directory
|
||||
self._dir = re.sub("(?i)\.(anki)$", ".media", self.deck.path)
|
||||
if not os.path.exists(self._dir):
|
||||
os.makedirs(self._dir)
|
||||
os.chdir(self._dir)
|
||||
# change database
|
||||
path = os.path.join(self.dir(), "media.db")
|
||||
create = not os.path.exists(path)
|
||||
self.db = DB(path)
|
||||
if create:
|
||||
self._initDB()
|
||||
|
||||
def dir(self):
|
||||
if not self._dir:
|
||||
self._dir = re.sub("(?i)\.(anki)$", ".media", self.deck.path)
|
||||
if not os.path.exists(self._dir):
|
||||
os.makedirs(self._dir)
|
||||
os.chdir(self._dir)
|
||||
return self._dir
|
||||
|
||||
# Adding media
|
||||
|
@ -118,7 +124,7 @@ If the same name exists, compare checksums."""
|
|||
# loop through directory and find unused & missing media
|
||||
unused = []
|
||||
for file in os.listdir(mdir):
|
||||
if file.startswith("latex-"):
|
||||
if file.startswith("latex-") or file.startswith("media.db"):
|
||||
continue
|
||||
path = os.path.join(mdir, file)
|
||||
if not os.path.isfile(path):
|
||||
|
@ -145,3 +151,107 @@ If the same name exists, compare checksums."""
|
|||
for f in self.mediaFiles(p[type]):
|
||||
files.add(f)
|
||||
return files
|
||||
|
||||
# Tracking changes
|
||||
##########################################################################
|
||||
|
||||
def _initDB(self):
|
||||
# in the log, a mod time of zero indicates a delete
|
||||
self.db.executescript("""
|
||||
create table media (fname text primary key, csum text, mod int);
|
||||
create table meta (dirMod int);
|
||||
insert into meta values (0);
|
||||
create table log (id int, fname text, mod int);
|
||||
create index ix_log_id on log (id);
|
||||
""")
|
||||
|
||||
def _mtime(self, path):
|
||||
return int(os.stat(path).st_mtime)
|
||||
|
||||
def _checksum(self, path):
|
||||
return checksum(open(path, "rb").read())
|
||||
|
||||
def changed(self):
|
||||
"Return dir mtime if it has changed since the last findChanges()"
|
||||
# doesn't track edits, but user can add or remove a file to update
|
||||
mod = self.db.scalar("select dirMod from meta")
|
||||
mtime = self._mtime(self.dir())
|
||||
if mod and mod == mtime:
|
||||
return False
|
||||
return mtime
|
||||
|
||||
def findChanges(self):
|
||||
"Scan the media folder if it's changed, and note any changes."
|
||||
if self.changed():
|
||||
self._logChanges()
|
||||
|
||||
def changesSince(self, mod):
|
||||
"Return a list of added and removed files since MOD time."
|
||||
self.findChanges()
|
||||
added = {}
|
||||
removed = {}
|
||||
# loop through and collect changes, removing duplicates
|
||||
for fname, mod in self.db.all(
|
||||
"select fname, mod from log where id > ?", mod):
|
||||
if mod:
|
||||
added[fname] = mod
|
||||
else:
|
||||
removed[fname] = mod
|
||||
return added.items(), removed.keys()
|
||||
|
||||
def _changes(self):
|
||||
self.cache = {}
|
||||
for (name, csum, mod) in self.db.execute(
|
||||
"select * from media"):
|
||||
self.cache[name] = [csum, mod, False]
|
||||
added = []
|
||||
removed = []
|
||||
changed = []
|
||||
# loop through on-disk files
|
||||
for f in os.listdir(self.dir()):
|
||||
# ignore our db and folders
|
||||
if f.startswith("media.db") or os.path.isdir(f):
|
||||
continue
|
||||
# newly added?
|
||||
if f not in self.cache:
|
||||
added.append(f)
|
||||
else:
|
||||
# modified since last time?
|
||||
if self._mtime(f) != self.cache[f][1]:
|
||||
# and has different checksum?
|
||||
if self._checksum(f) != self.cache[f][0]:
|
||||
changed.append(f)
|
||||
# mark as used
|
||||
self.cache[f][2] = True
|
||||
# look for any entries in the cache that no longer exist on disk
|
||||
for (k, v) in self.cache.items():
|
||||
if not v[2]:
|
||||
removed.append(k)
|
||||
return added, changed, removed
|
||||
|
||||
def _logChanges(self):
|
||||
(added, changed, removed) = self._changes()
|
||||
log = []
|
||||
media = []
|
||||
mediaRem = []
|
||||
t = intTime()
|
||||
for f in added:
|
||||
mt = self._mtime(f)
|
||||
media.append((f, self._checksum(f), mt))
|
||||
log.append((t, f, mt))
|
||||
for f in changed:
|
||||
mt = self._mtime(f)
|
||||
media.append((f, self._checksum(f), mt))
|
||||
log.append((t, f, 0))
|
||||
log.append((t, f, mt))
|
||||
for f in removed:
|
||||
mediaRem.append((f,))
|
||||
log.append((t, f, 0))
|
||||
# update db
|
||||
self.db.executemany("insert or replace into media values (?,?,?)",
|
||||
media)
|
||||
self.db.executemany("insert into log values (?,?,?)", log)
|
||||
if mediaRem:
|
||||
self.db.executemany("delete from media where fname = ?",
|
||||
mediaRem)
|
||||
self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
|
||||
|
|
|
@ -4,14 +4,7 @@
|
|||
|
||||
import re, os, random, time, types, math, htmlentitydefs, subprocess, \
|
||||
tempfile, shutil
|
||||
|
||||
try:
|
||||
import hashlib
|
||||
md5 = hashlib.md5
|
||||
except ImportError:
|
||||
import md5
|
||||
md5 = md5.new
|
||||
|
||||
from hashlib import md5, sha1
|
||||
from anki.lang import _, ngettext
|
||||
import locale, sys
|
||||
|
||||
|
@ -204,6 +197,9 @@ def splitFields(string):
|
|||
def checksum(data):
|
||||
return md5(data).hexdigest()
|
||||
|
||||
def sha1sum(data):
|
||||
return sha1(data).hexdigest()
|
||||
|
||||
def fieldChecksum(data):
|
||||
# 32 bit unsigned number from first 8 digits of md5 hash
|
||||
return int(checksum(data.encode("utf-8"))[:8], 16)
|
||||
|
|
|
@ -14,8 +14,8 @@ def test_latex():
|
|||
f = d.newFact()
|
||||
f['Front'] = u"[latex]hello[/latex]"
|
||||
d.addFact(f)
|
||||
# but since latex couldn't run, it will be empty
|
||||
assert len(os.listdir(d.media.dir())) == 0
|
||||
# but since latex couldn't run, it will only have the media.db
|
||||
assert len(os.listdir(d.media.dir())) == 1
|
||||
# check the error message
|
||||
msg = f.cards()[0].q()
|
||||
assert "executing latex" in msg
|
||||
|
|
|
@ -59,3 +59,40 @@ def test_deckIntegration():
|
|||
ret = d.media.check()
|
||||
assert ret[0] == ["fake2.png"]
|
||||
assert ret[1] == ["foo.jpg"]
|
||||
|
||||
def test_changes():
|
||||
d = getEmptyDeck()
|
||||
assert d.media.changed()
|
||||
add, rem = d.media.changesSince(0)
|
||||
assert not add; assert not rem
|
||||
assert not d.media.changed()
|
||||
# add a file
|
||||
dir = tempfile.mkdtemp(prefix="anki")
|
||||
path = os.path.join(dir, "foo.jpg")
|
||||
open(path, "w").write("hello")
|
||||
time.sleep(1)
|
||||
path = d.media.addFile(path)
|
||||
# should have been logged
|
||||
add, rem = d.media.changesSince(0)
|
||||
assert add; assert not rem
|
||||
mod = add[0][1]
|
||||
# if we modify it, the cache won't notice
|
||||
time.sleep(1)
|
||||
open(path, "w").write("world")
|
||||
add, rem = d.media.changesSince(0)
|
||||
assert len(add) == 1
|
||||
# but if we add another file, it will
|
||||
time.sleep(1)
|
||||
open(path+"2", "w").write("yo")
|
||||
add, rem = d.media.changesSince(0)
|
||||
assert len(add) == 2
|
||||
assert len(rem) == 1
|
||||
assert add[0][1] != mod
|
||||
assert add[0][0] == "foo.jpg"
|
||||
# deletions should get noticed too
|
||||
time.sleep(1)
|
||||
os.unlink(path+"2")
|
||||
add, rem = d.media.changesSince(0)
|
||||
assert len(add) == 2
|
||||
assert len(rem) == 2
|
||||
|
||||
|
|
Loading…
Reference in a new issue