# -*- coding: utf-8 -*- # Copyright: Damien Elmes # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html import zlib, re, urllib, urllib2, socket, simplejson, time, shutil import os, base64, sys, httplib, types from datetime import date import anki, anki.deck, anki.cards from anki.errors import * from anki.utils import ids2str, checksum, intTime from anki.consts import * from anki.lang import _ from hooks import runHook if simplejson.__version__ < "1.7.3": raise Exception("SimpleJSON must be 1.7.3 or later.") CHUNK_SIZE = 32768 MIME_BOUNDARY = "Anki-sync-boundary" SYNC_HOST = os.environ.get("SYNC_HOST") or "dev.ankiweb.net" SYNC_PORT = int(os.environ.get("SYNC_PORT") or 80) SYNC_URL = "http://%s:%d/sync/" % (SYNC_HOST, SYNC_PORT) KEYS = ("models", "facts", "cards", "media") # todo: # - ensure all urllib references are converted to urllib2 for proxies # - ability to cancel # - need to make sure syncing doesn't bump the deck modified time if nothing was # changed, since by default closing the deck bumps the mod time # - syncing with #/&/etc in password # - timeout on all requests (issue 2625) # - ditch user/pass in favour of session key? # full sync: # - compress and divide into pieces # - zlib? zip? content-encoding? if latter, need to account for bad proxies # that decompress. ########################################################################## from anki.consts import * class Syncer(object): MAX_REVLOG = 5000 MAX_CARDS = 5000 MAX_FACTS = 2500 def __init__(self, deck, server=None): self.deck = deck self.server = server def status(self, type): "Override to trace sync progress." #print "sync:", type pass def sync(self): "Returns 'noChanges', 'fullSync', or 'success'." # step 1: login & metadata self.rmod, rscm, self.maxUsn = self.server.times() self.lmod, lscm, self.minUsn = self.times() if self.lmod == self.rmod: return "noChanges" elif lscm != rscm: return "fullSync" self.lnewer = self.lmod > self.rmod # step 2: deletions and small objects lchg = self.changes() rchg = self.server.applyChanges( minUsn=self.minUsn, lnewer=self.lnewer, changes=lchg) self.mergeChanges(lchg, rchg) # step 3: stream large tables from server while 1: chunk = self.server.chunk() self.applyChunk(chunk) if chunk['done']: break # step 4: stream to server while 1: chunk = self.chunk() self.server.applyChunk(chunk) if chunk['done']: break # step 5: sanity check during beta testing c = self.sanityCheck() s = self.server.sanityCheck() assert c == s # finalize mod = self.server.finish() self.finish(mod) return "success" def times(self): return (self.deck.mod, self.deck.scm, self.deck._usn) def changes(self): "Bundle up deletions and small objects, and apply if server." d = dict(models=self.getModels(), groups=self.getGroups(), tags=self.getTags(), graves=self.getGraves()) if self.lnewer: d['conf'] = self.getConf() return d def applyChanges(self, minUsn, lnewer, changes): # we're the server; save info self.maxUsn = self.deck._usn self.minUsn = minUsn self.lnewer = not lnewer self.rchg = changes lchg = self.changes() # merge our side before returning self.mergeChanges(lchg, self.rchg) return lchg def mergeChanges(self, lchg, rchg): # first, handle the deletions self.mergeGraves(rchg['graves']) # then the other objects self.mergeModels(rchg['models']) self.mergeGroups(rchg['groups']) self.mergeTags(rchg['tags']) if 'conf' in rchg: self.mergeConf(rchg['conf']) self.prepareToChunk() def sanityCheck(self): # some basic checks to ensure the sync went ok. this is slow, so will # be removed before official release assert not self.deck.db.scalar(""" select count() from cards where fid not in (select id from facts)""") assert not self.deck.db.scalar(""" select count() from facts where id not in (select distinct fid from cards)""") for t in "cards", "facts", "revlog", "graves": assert not self.deck.db.scalar( "select count() from %s where usn = -1" % t) for g in self.deck.groups.all(): assert g['usn'] != -1 for t, usn in self.deck.tags.allItems(): assert usn != -1 for m in self.deck.models.all(): assert m['usn'] != -1 return [ self.deck.db.scalar("select count() from cards"), self.deck.db.scalar("select count() from facts"), self.deck.db.scalar("select count() from revlog"), self.deck.db.scalar("select count() from fsums"), self.deck.db.scalar("select count() from graves"), len(self.deck.models.all()), len(self.deck.tags.all()), len(self.deck.groups.all()), len(self.deck.groups.allConf()), ] def usnLim(self): if self.deck.server: return "usn >= %d" % self.minUsn else: return "usn = -1" def finish(self, mod=None): if not mod: # server side; we decide new mod time mod = intTime() self.deck.ls = mod self.deck._usn = self.maxUsn + 1 self.deck.save(mod=mod) return mod # Chunked syncing ########################################################################## def prepareToChunk(self): self.tablesLeft = ["revlog", "cards", "facts"] self.cursor = None def cursorForTable(self, table): lim = self.usnLim() x = self.deck.db.execute d = (self.maxUsn, lim) if table == "revlog": return x(""" select id, cid, %d, ease, ivl, lastIvl, factor, time, type from revlog where %s""" % d) elif table == "cards": return x(""" select id, fid, gid, ord, mod, %d, type, queue, due, ivl, factor, reps, lapses, left, edue, flags, data from cards where %s""" % d) else: return x(""" select id, guid, mid, gid, mod, %d, tags, flds, '', flags, data from facts where %s""" % d) def chunk(self): buf = dict(done=False) # gather up to 5000 records lim = 5000 while self.tablesLeft and lim: curTable = self.tablesLeft[0] if not self.cursor: self.cursor = self.cursorForTable(curTable) rows = self.cursor.fetchmany(lim) if len(rows) != lim: # table is empty self.tablesLeft.pop(0) self.cursor = None # if we're the client, mark the objects as having been sent if not self.deck.server: self.deck.db.execute( "update %s set usn=? where usn=-1"%curTable, self.maxUsn) buf[curTable] = rows lim -= len(buf) if not self.tablesLeft: buf['done'] = True return buf def applyChunk(self, chunk): if "revlog" in chunk: self.mergeRevlog(chunk['revlog']) if "cards" in chunk: self.mergeCards(chunk['cards']) if "facts" in chunk: self.mergeFacts(chunk['facts']) # Deletions ########################################################################## def getGraves(self): cards = [] facts = [] groups = [] if self.deck.server: curs = self.deck.db.execute( "select oid, type from graves where usn >= ?", self.minUsn) else: curs = self.deck.db.execute( "select oid, type from graves where usn = -1") for oid, type in curs: if type == REM_CARD: cards.append(oid) elif type == REM_FACT: facts.append(oid) else: groups.append(oid) if not self.deck.server: self.deck.db.execute("update graves set usn=? where usn=-1", self.maxUsn) return dict(cards=cards, facts=facts, groups=groups) def mergeGraves(self, graves): # facts first, so we don't end up with duplicate graves self.deck._remFacts(graves['facts']) self.deck.remCards(graves['cards']) for oid in graves['groups']: self.deck.groups.rem(oid) # Models ########################################################################## def getModels(self): if self.deck.server: return [m for m in self.deck.models.all() if m['usn'] >= self.minUsn] else: mods = [m for m in self.deck.models.all() if m['usn'] == -1] for m in mods: m['usn'] = self.maxUsn self.deck.models.save() return mods def mergeModels(self, rchg): for r in rchg: l = self.deck.models.get(r['id']) # if missing locally or server is newer, update if not l or r['mod'] > l['mod']: self.deck.models.update(r) # Groups ########################################################################## def getGroups(self): if self.deck.server: return [ [g for g in self.deck.groups.all() if g['usn'] >= self.minUsn], [g for g in self.deck.groups.allConf() if g['usn'] >= self.minUsn] ] else: groups = [g for g in self.deck.groups.all() if g['usn'] == -1] for g in groups: g['usn'] = self.maxUsn gconf = [g for g in self.deck.groups.allConf() if g['usn'] == -1] for g in gconf: g['usn'] = self.maxUsn self.deck.groups.save() return [groups, gconf] def mergeGroups(self, rchg): for r in rchg[0]: l = self.deck.groups.get(r['id'], False) # if missing locally or server is newer, update if not l or r['mod'] > l['mod']: self.deck.groups.update(r) for r in rchg[1]: l = self.deck.groups.conf(r['id']) # if missing locally or server is newer, update if not l or r['mod'] > l['mod']: self.deck.groups.updateConf(r) # Tags ########################################################################## def getTags(self): if self.deck.server: return [t for t, usn in self.deck.tags.allItems() if usn >= self.minUsn] else: tags = [] for t, usn in self.deck.tags.allItems(): if usn == -1: self.deck.tags.tags[t] = self.maxUsn tags.append(t) self.deck.tags.save() return tags def mergeTags(self, tags): self.deck.tags.register(tags, usn=self.maxUsn) # Cards/facts/revlog ########################################################################## def mergeRevlog(self, logs): self.deck.db.executemany( "insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)", logs) def newerRows(self, data, table, modIdx): ids = (r[0] for r in data) lmods = {} for id, mod in self.deck.db.execute( "select id, mod from %s where id in %s and %s" % ( table, ids2str(ids), self.usnLim())): lmods[id] = mod update = [] for r in data: if r[0] not in lmods or lmods[r[0]] < r[modIdx]: update.append(r) return update def mergeCards(self, cards): self.deck.db.executemany( "insert or replace into cards values " "(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", self.newerRows(cards, "cards", 4)) def mergeFacts(self, facts): rows = self.newerRows(facts, "facts", 4) self.deck.db.executemany( "insert or replace into facts values (?,?,?,?,?,?,?,?,?,?,?)", rows) self.deck.updateFieldCache([f[0] for f in rows]) # Deck config ########################################################################## def getConf(self): return self.deck.conf def mergeConf(self, conf): self.deck.conf = conf class LocalServer(Syncer): # serialize/deserialize payload, so we don't end up sharing objects # between decks in testing def applyChanges(self, minUsn, lnewer, changes): l = simplejson.loads; d = simplejson.dumps return l(d(Syncer.applyChanges(self, minUsn, lnewer, l(d(changes))))) # not yet ported class RemoteServer(Syncer): pass # def unstuff(self, data): # return simplejson.loads(unicode(zlib.decompress(data), "utf8")) # def stuff(self, data): # return zlib.compress(simplejson.dumps(data)) # HTTP proxy: act as a server and direct requests to the real server ########################################################################## # not yet ported class HttpSyncServerProxy(object): def __init__(self, user, passwd): SyncServer.__init__(self) self.decks = None self.deckName = None self.username = user self.password = passwd self.protocolVersion = 5 self.sourcesToCheck = [] def connect(self, clientVersion=""): "Check auth, protocol & grab deck list." if not self.decks: import socket socket.setdefaulttimeout(30) d = self.runCmd("getDecks", libanki=anki.version, client=clientVersion, sources=simplejson.dumps(self.sourcesToCheck), pversion=self.protocolVersion) socket.setdefaulttimeout(None) if d['status'] != "OK": raise SyncError(type="authFailed", status=d['status']) self.decks = d['decks'] self.timestamp = d['timestamp'] self.timediff = abs(self.timestamp - time.time()) def hasDeck(self, deckName): self.connect() return deckName in self.decks.keys() def availableDecks(self): self.connect() return self.decks.keys() def createDeck(self, deckName): ret = self.runCmd("createDeck", name=deckName.encode("utf-8")) if not ret or ret['status'] != "OK": raise SyncError(type="createFailed") self.decks[deckName] = [0, 0] def summary(self, lastSync): return self.runCmd("summary", lastSync=self.stuff(lastSync)) def genOneWayPayload(self, lastSync): return self.runCmd("genOneWayPayload", lastSync=self.stuff(lastSync)) def modified(self): self.connect() return self.decks[self.deckName][0] def _lastSync(self): self.connect() return self.decks[self.deckName][1] def applyPayload(self, payload): return self.runCmd("applyPayload", payload=self.stuff(payload)) def finish(self): assert self.runCmd("finish") == "OK" def runCmd(self, action, **args): data = {"p": self.password, "u": self.username, "v": 2} if self.deckName: data['d'] = self.deckName.encode("utf-8") else: data['d'] = None data.update(args) data = urllib.urlencode(data) try: f = urllib2.urlopen(SYNC_URL + action, data) except (urllib2.URLError, socket.error, socket.timeout, httplib.BadStatusLine), e: raise SyncError(type="connectionError", exc=`e`) ret = f.read() if not ret: raise SyncError(type="noResponse") try: return self.unstuff(ret) except Exception, e: raise SyncError(type="connectionError", exc=`e`) # Full syncing ########################################################################## # not yet ported class FullSyncer(object): def __init__(self, deck): self.deck = deck def prepareFullSync(self): t = time.time() # ensure modified is not greater than server time self.deck.modified = min(self.deck.modified, self.server.timestamp) self.deck.db.commit() self.deck.close() fields = { "p": self.server.password, "u": self.server.username, "d": self.server.deckName.encode("utf-8"), } if self.localTime > self.remoteTime: return ("fromLocal", fields, self.deck.path) else: return ("fromServer", fields, self.deck.path) def fullSync(self): ret = self.prepareFullSync() if ret[0] == "fromLocal": self.fullSyncFromLocal(ret[1], ret[2]) else: self.fullSyncFromServer(ret[1], ret[2]) def fullSyncFromLocal(self, fields, path): global sendProgressHook try: # write into a temporary file, since POST needs content-length src = open(path, "rb") name = namedtmp("fullsync.anki") tmp = open(name, "wb") # post vars for (key, value) in fields.items(): tmp.write('--' + MIME_BOUNDARY + "\r\n") tmp.write('Content-Disposition: form-data; name="%s"\r\n' % key) tmp.write('\r\n') tmp.write(value) tmp.write('\r\n') # file header tmp.write('--' + MIME_BOUNDARY + "\r\n") tmp.write( 'Content-Disposition: form-data; name="deck"; filename="deck"\r\n') tmp.write('Content-Type: application/octet-stream\r\n') tmp.write('\r\n') # data comp = zlib.compressobj() while 1: data = src.read(CHUNK_SIZE) if not data: tmp.write(comp.flush()) break tmp.write(comp.compress(data)) src.close() tmp.write('\r\n--' + MIME_BOUNDARY + '--\r\n\r\n') size = tmp.tell() tmp.seek(0) # open http connection runHook("fullSyncStarted", size) headers = { 'Content-type': 'multipart/form-data; boundary=%s' % MIME_BOUNDARY, 'Content-length': str(size), 'Host': SYNC_HOST, } req = urllib2.Request(SYNC_URL + "fullup?v=2", tmp, headers) try: sendProgressHook = fullSyncProgressHook res = urllib2.urlopen(req).read() assert res.startswith("OK") # update lastSync c = sqlite.connect(path) c.execute("update decks set lastSync = ?", (res[3:],)) c.commit() c.close() finally: sendProgressHook = None tmp.close() finally: runHook("fullSyncFinished") def fullSyncFromServer(self, fields, path): try: runHook("fullSyncStarted", 0) fields = urllib.urlencode(fields) src = urllib.urlopen(SYNC_URL + "fulldown", fields) tmpname = namedtmp("fullsync.anki") tmp = open(tmpname, "wb") decomp = zlib.decompressobj() cnt = 0 while 1: data = src.read(CHUNK_SIZE) if not data: tmp.write(decomp.flush()) break tmp.write(decomp.decompress(data)) cnt += CHUNK_SIZE runHook("fullSyncProgress", "fromServer", cnt) src.close() tmp.close() os.close(fd) # if we were successful, overwrite old deck os.unlink(path) os.rename(tmpname, path) # reset the deck name c = sqlite.connect(path) c.execute("update decks set syncName = ?", [checksum(path.encode("utf-8"))]) c.commit() c.close() finally: runHook("fullSyncFinished") ########################################################################## # Monkey-patch httplib to incrementally send instead of chewing up large # amounts of memory, and track progress. sendProgressHook = None def incrementalSend(self, strOrFile): if self.sock is None: if self.auto_open: self.connect() else: raise NotConnected() if self.debuglevel > 0: print "send:", repr(str) try: if (isinstance(strOrFile, str) or isinstance(strOrFile, unicode)): self.sock.sendall(strOrFile) else: cnt = 0 t = time.time() while 1: if sendProgressHook and time.time() - t > 1: sendProgressHook(cnt) t = time.time() data = strOrFile.read(CHUNK_SIZE) cnt += len(data) if not data: break self.sock.sendall(data) except socket.error, v: if v[0] == 32: # Broken pipe self.close() raise httplib.HTTPConnection.send = incrementalSend def fullSyncProgressHook(cnt): runHook("fullSyncProgress", "fromLocal", cnt)