initial work on media syncing

This commit is contained in:
Damien Elmes 2011-10-03 12:45:08 +09:00
parent a8d2578be5
commit 5da3bba1df
6 changed files with 267 additions and 39 deletions

View file

@ -2,6 +2,8 @@
# Copyright: Damien Elmes <anki@ichi2.net> # Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import os
# whether new cards should be mixed with reviews, or shown first or last # whether new cards should be mixed with reviews, or shown first or last
NEW_CARDS_DISTRIBUTE = 0 NEW_CARDS_DISTRIBUTE = 0
NEW_CARDS_LAST = 1 NEW_CARDS_LAST = 1
@ -33,6 +35,15 @@ COUNT_REMAINING = 1
MEDIA_ADD = 0 MEDIA_ADD = 0
MEDIA_REM = 1 MEDIA_REM = 1
# syncing vars
SYNC_ZIP_SIZE = 10*1024*1024
CHUNK_SIZE = 65536
MIME_BOUNDARY = "Anki-sync-boundary"
SYNC_HOST = os.environ.get("SYNC_HOST") or "dev.ankiweb.net"
SYNC_PORT = int(os.environ.get("SYNC_PORT") or 80)
SYNC_URL = "http://%s:%d/sync/" % (SYNC_HOST, SYNC_PORT)
SYNC_VER = 0
# Labels # Labels
########################################################################## ##########################################################################

View file

@ -68,3 +68,13 @@ class DB(object):
def set_progress_handler(self, *args): def set_progress_handler(self, *args):
self._db.set_progress_handler(*args) self._db.set_progress_handler(*args)
def __enter__(self):
self._db.execute("begin")
return self
def __exit__(self, exc_type, *args):
if not exc_type:
# no exception, so commit
self._db.commit()
self._db.close()

View file

@ -3,7 +3,8 @@
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import os, shutil, re, urllib, urllib2, time, unicodedata, \ import os, shutil, re, urllib, urllib2, time, unicodedata, \
urllib, sys, shutil urllib, sys, shutil, simplejson, zipfile
from cStringIO import StringIO
from anki.utils import checksum, intTime, namedtmp, isWin from anki.utils import checksum, intTime, namedtmp, isWin
from anki.lang import _ from anki.lang import _
from anki.db import DB from anki.db import DB
@ -169,7 +170,7 @@ If the same name exists, compare checksums."""
def removed(self): def removed(self):
self.findChanges() self.findChanges()
return self.db.execute("select * from log where type = ?", MEDIA_REM) return self.db.list("select * from log where type = ?", MEDIA_REM)
def clearLog(self): def clearLog(self):
self.db.execute("delete from log") self.db.execute("delete from log")
@ -181,7 +182,7 @@ If the same name exists, compare checksums."""
# in the log, a mod time of zero indicates a delete # in the log, a mod time of zero indicates a delete
self.db.executescript(""" self.db.executescript("""
create table media (fname text primary key, csum text, mod int); create table media (fname text primary key, csum text, mod int);
create table meta (dirMod int); insert into meta values (0); create table meta (dirMod int, usn int); insert into meta values (0, 0);
create table log (fname text primary key, type int); create table log (fname text primary key, type int);
""") """)
@ -191,6 +192,15 @@ create table log (fname text primary key, type int);
def _checksum(self, path): def _checksum(self, path):
return checksum(open(path, "rb").read()) return checksum(open(path, "rb").read())
def usn(self):
return self.db.scalar("select usn from meta")
def setUsn(self, usn):
self.db.execute("update meta set usn = ?", usn)
def syncMod(self):
self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
def _changed(self): def _changed(self):
"Return dir mtime if it has changed since the last findChanges()" "Return dir mtime if it has changed since the last findChanges()"
# doesn't track edits, but user can add or remove a file to update # doesn't track edits, but user can add or remove a file to update
@ -256,3 +266,95 @@ create table log (fname text primary key, type int);
if not v[2]: if not v[2]:
removed.append(k) removed.append(k)
return added, removed return added, removed
# Adding/removing files in media sync
##########################################################################
def syncRemove(self, fnames):
for f in fnames:
if os.path.exists(f):
os.unlink(f)
self.db.execute("delete from log where fname = ?", f)
self.db.execute("delete from media where fname = ?", f)
def syncAdd(self, zipData):
"Extra zip data; true if finished."
f = StringIO(zipData)
z = zipfile.ZipFile(f, "r")
finished = False
meta = None
media = []
sizecnt = 0
# get meta info first
assert z.getinfo("_meta").file_size < 100000
meta = simplejson.loads(z.read("_meta"))
# then loop through all files
for i in z.infolist():
# check for zip bombs
sizecnt += i.file_size
assert sizecnt < 100*1024*1024
if i.filename == "_meta":
# ignore previously-retrieved meta
continue
elif i.filename == "_finished":
# last zip in set
finished = True
else:
# prepare sql
data = z.read(i)
csum = checksum(data)
mod = meta[i.filename]['mod']
name = meta[i.filename]['name']
# malicious chars?
for c in '/\\':
assert c not in name
media.append((name, csum, mod))
# remove entries from local log
self.db.execute("delete from log where fname = ?", name)
# save file
open(name, "wb").write(data)
# set mod time if possible; may fail on some filesystems
try:
os.utime(name, (mod, mod))
except:
print "failed to set utime"
# update media db
if media:
self.db.executemany(
"insert or replace into media values (?,?,?)", media)
# if we have finished adding, we need to record the new folder mtime
# so that we don't trigger a needless scan
if finished:
self.syncMod()
# also need to clear log after sync finished
return finished
# Streaming zips
##########################################################################
# Because there's no standard filename encoding for zips, and because not
# all zip clients support retrieving mtime, we store the files as ascii
# and place a json file in the zip with the necessary information.
def zipFromAdded(self, cur):
"Add files to a zip until over SYNC_ZIP_SIZE. Return zip data."
f = StringIO()
z = zipfile.ZipFile(f, "w")
sz = 0
cnt = 0
files = {}
while 1:
fname = cur.fetchone()
if not fname:
z.writestr("_finished", "")
break
z.write(fname, str(cnt))
files[str(c)] = dict(
name=fname, mod=self._mtime(fname))
sz += os.path.getsize(fname)
if sz > SYNC_ZIP_SIZE:
break
cnt += 1
z.writestr("_meta", simplejson.dumps(files))
z.close()
return f.getvalue()

View file

@ -78,6 +78,8 @@ mplayerReader = None
mplayerEvt = threading.Event() mplayerEvt = threading.Event()
mplayerClear = False mplayerClear = False
# fixme from robert: can we do away with this with stderr=file(os.devnull,
# 'w') in the popen call?
class MplayerReader(threading.Thread): class MplayerReader(threading.Thread):
"Read any debugging info to prevent mplayer from blocking." "Read any debugging info to prevent mplayer from blocking."

View file

@ -2,7 +2,7 @@
# Copyright: Damien Elmes <anki@ichi2.net> # Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import urllib, simplejson, os, sys, httplib2, zipfile, gzip import urllib, simplejson, os, sys, httplib2, gzip
from cStringIO import StringIO from cStringIO import StringIO
from datetime import date from datetime import date
from anki.db import DB from anki.db import DB
@ -15,13 +15,6 @@ from hooks import runHook
if simplejson.__version__ < "1.7.3": if simplejson.__version__ < "1.7.3":
raise Exception("SimpleJSON must be 1.7.3 or later.") raise Exception("SimpleJSON must be 1.7.3 or later.")
CHUNK_SIZE = 65536
MIME_BOUNDARY = "Anki-sync-boundary"
SYNC_HOST = os.environ.get("SYNC_HOST") or "dev.ankiweb.net"
SYNC_PORT = int(os.environ.get("SYNC_PORT") or 80)
SYNC_URL = "http://%s:%d/sync/" % (SYNC_HOST, SYNC_PORT)
SYNC_VER = 0
# - 64 bit guid will be munged in js; need to escape or rethink # - 64 bit guid will be munged in js; need to escape or rethink
# - make sure /sync/download is compressed # - make sure /sync/download is compressed
@ -34,16 +27,13 @@ SYNC_VER = 0
# changed, since by default closing the deck bumps the mod time # changed, since by default closing the deck bumps the mod time
# - ensure the user doesn't add foreign chars to passsword # - ensure the user doesn't add foreign chars to passsword
# Incremental syncing
########################################################################## ##########################################################################
from anki.consts import * from anki.consts import *
class Syncer(object): class Syncer(object):
MAX_REVLOG = 5000
MAX_CARDS = 5000
MAX_FACTS = 2500
def __init__(self, deck, server=None): def __init__(self, deck, server=None):
self.deck = deck self.deck = deck
self.server = server self.server = server
@ -383,6 +373,7 @@ from facts where %s""" % d)
########################################################################## ##########################################################################
class LocalServer(Syncer): class LocalServer(Syncer):
# serialize/deserialize payload, so we don't end up sharing objects # serialize/deserialize payload, so we don't end up sharing objects
# between decks # between decks
def applyChanges(self, minUsn, lnewer, changes): def applyChanges(self, minUsn, lnewer, changes):
@ -404,6 +395,9 @@ class HttpSyncer(object):
self.hkey = cont self.hkey = cont
return cont return cont
def _vars(self):
return dict(k=self.hkey)
# Posting data as a file # Posting data as a file
###################################################################### ######################################################################
# We don't want to post the payload as a form var, as the percent-encoding is # We don't want to post the payload as a form var, as the percent-encoding is
@ -422,23 +416,24 @@ class HttpSyncer(object):
'Content-Disposition: form-data; name="%s"\r\n\r\n%s\r\n' % 'Content-Disposition: form-data; name="%s"\r\n\r\n%s\r\n' %
(key, value)) (key, value))
# file header # file header
buf.write(bdry + "\r\n") if fobj:
buf.write("""\ buf.write(bdry + "\r\n")
buf.write("""\
Content-Disposition: form-data; name="data"; filename="data"\r\n\ Content-Disposition: form-data; name="data"; filename="data"\r\n\
Content-Type: application/octet-stream\r\n\r\n""") Content-Type: application/octet-stream\r\n\r\n""")
# write file into buffer, optionally compressing # write file into buffer, optionally compressing
if comp: if comp:
tgt = gzip.GzipFile(mode="wb", fileobj=buf, compresslevel=comp) tgt = gzip.GzipFile(mode="wb", fileobj=buf, compresslevel=comp)
else: else:
tgt = buf tgt = buf
while 1: while 1:
data = fobj.read(CHUNK_SIZE) data = fobj.read(CHUNK_SIZE)
if not data: if not data:
if comp: if comp:
tgt.close() tgt.close()
break break
tgt.write(data) tgt.write(data)
buf.write('\r\n' + bdry + '--\r\n') buf.write('\r\n' + bdry + '--\r\n')
size = buf.tell() size = buf.tell()
# connection headers # connection headers
headers = { headers = {
@ -453,7 +448,11 @@ Content-Type: application/octet-stream\r\n\r\n""")
raise Exception("Invalid response code: %s" % resp['status']) raise Exception("Invalid response code: %s" % resp['status'])
return cont return cont
# Incremental sync over HTTP
######################################################################
class RemoteServer(Syncer, HttpSyncer): class RemoteServer(Syncer, HttpSyncer):
def __init__(self, user, hkey): def __init__(self, user, hkey):
self.user = user self.user = user
self.hkey = hkey self.hkey = hkey
@ -490,9 +489,6 @@ class RemoteServer(Syncer, HttpSyncer):
def finish(self, **kw): def finish(self, **kw):
return self._run("finish", kw) return self._run("finish", kw)
def _vars(self):
return dict(k=self.hkey)
def _run(self, cmd, data): def _run(self, cmd, data):
return simplejson.loads( return simplejson.loads(
self.postData(self.con, cmd, StringIO(simplejson.dumps(data)), self.postData(self.con, cmd, StringIO(simplejson.dumps(data)),
@ -507,9 +503,6 @@ class FullSyncer(HttpSyncer):
self.deck = deck self.deck = deck
self.hkey = hkey self.hkey = hkey
def _vars(self):
return dict(k=self.hkey)
def _con(self): def _con(self):
return httplib2.Http(timeout=60) return httplib2.Http(timeout=60)
@ -535,7 +528,78 @@ class FullSyncer(HttpSyncer):
# Media syncing # Media syncing
########################################################################## ##########################################################################
class MediaSyncer(HttpSyncer): class MediaSyncer(object):
pass
def __init__(self, deck, server=None):
self.deck = deck
self.server = server
self.added = None
def sync(self):
# step 1: send/recv deletions
runHook("mediaSync", "remove")
usn = self.deck.media.usn()
lrem = self.removed()
rrem = self.server.remove(fnames=lrem, minUsn=usn)
self.remove(rrem)
# step 2: stream files from server
runHook("mediaSync", "server")
while 1:
runHook("mediaSync", "stream")
zip = self.server.files()
if self.addFiles(zip=zip):
break
# step 3: stream files to the server
runHook("mediaSync", "client")
while 1:
runHook("mediaSync", "stream")
zip = self.files()
usn = self.server.addFiles(zip=zip)
if usn:
# when server has run out of files, it returns bumped usn
break
self.deck.media.setUsn(usn)
self.deck.media.clearLog()
# fixme: need to commit to media db? or is already in that commit mode?
def removed(self):
return self.deck.media.removed()
def remove(self, fnames, minUsn=None):
self.deck.media.syncRemove(fnames)
if minUsn is not None:
# we're the server
self.minUsn = minUsn
return self.deck.media.removed()
def files(self):
if not self.added:
self.added = self.deck.media.added()
return self.deck.media.zipFromAdded(self.added)
def addFiles(self, zip):
"True if zip is the last in set. Server returns new usn instead."
return self.deck.media.syncAdd(zip)
# Remote media syncing
##########################################################################
class RemoteMediaServer(MediaSyncer, HttpSyncer):
def __init__(self, hkey):
self.hkey = hkey
self.con = httplib2.Http(timeout=60)
def remove(self, **kw):
return simplejson.loads(
self.postData(
self.con, "remove", StringIO(simplejson.dumps(kw)),
self._vars()))
def files(self):
return self.postData(self.con, "files", None, self._vars())
def addFiles(self, zip):
return self.postData(self.con, "files", StringIO(zip),
self._vars(), comp=0)

View file

@ -6,7 +6,8 @@ from tests.shared import assertException
from anki.errors import * from anki.errors import *
from anki import Deck from anki import Deck
from anki.utils import intTime from anki.utils import intTime
from anki.sync import Syncer, FullSyncer, LocalServer, RemoteServer from anki.sync import Syncer, FullSyncer, LocalServer, RemoteServer, \
MediaSyncer, RemoteMediaServer
from anki.facts import Fact from anki.facts import Fact
from anki.cards import Card from anki.cards import Card
from tests.shared import getEmptyDeck from tests.shared import getEmptyDeck
@ -322,3 +323,41 @@ def test_remoteSync():
f.download() f.download()
d = Deck(client.deck.path) d = Deck(client.deck.path)
assert d.mod == lmod assert d.mod == lmod
# Media tests
##########################################################################
# We can't run many tests for local media, because the desktop code assumes
# the current directory is the media folder
def setup_media():
global client, server
setup_basic()
server = MediaSyncer(deck2)
client = MediaSyncer(deck1, server)
@nose.with_setup(setup_media)
def test_mediaNothing():
client.sync()
# Remote media tests
##########################################################################
def setup_remoteMedia():
global client, server
setup_basic()
server = RemoteMediaServer(TEST_HKEY)
client = MediaSyncer(deck1, server)
@nose.with_setup(setup_remoteMedia)
def test_remoteMediaNothing():
client.sync()
# @nose.with_setup(setup_media)
# def test_mediaAdd():
# open(os.path.join(deck1.media.dir(), "foo.jpg"), "wb").write("foo")
# assert len(os.listdir(deck1.media.dir())) == 1
# assert len(os.listdir(deck2.media.dir())) == 0
# client.sync()
# assert len(os.listdir(deck1.media.dir())) == 1
# assert len(os.listdir(deck2.media.dir())) == 1