Anki/anki/sync.py
Damien Elmes b5c0b1f2c7 drop required/unique field properties
Instead of having required and unique flags for every field, enforce both
requirements on the first field, and neither on the rest. This mirrors the
subject/body format people are used to in note-taking apps. The subject
defines the object being learnt, and the remaining fields represent properties
of that object.

In the past, duplicate checking served two purposes: it quickly notified the
user that they're entering the same fact twice, and it notified the user if
they'd accidentally mistyped a secondary field. The former behaviour is
important for avoiding wasted effort, and so it should be done in real time.
The latter behaviour is not essential however - a typo is not wasted effort,
and it could be fixed in a periodic 'find duplicates' function. Given that
some users ended up with sluggish decks due to the overhead a large number of
facts * a large number of unique fields caused, this seems like a change for
the better.

This also means Anki will let you add notes as long as as the first field has
been filled out. Again, this is not a big deal: Anki is still checking to make
sure one or more cards will be generated, and the user can easily add any
missing fields later.

As a bonus, this change simplifies field configuration somewhat. As the card
layout and field dialogs are a popular point of confusion, the more they can
be simplified, the better.
2011-11-24 22:16:03 +09:00

618 lines
21 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import urllib, simplejson, os, sys, httplib2, gzip
from cStringIO import StringIO
from datetime import date
from anki.db import DB
from anki.errors import *
from anki.utils import ids2str, checksum, intTime
from anki.consts import *
from anki.lang import _
from hooks import runHook
if simplejson.__version__ < "1.7.3":
raise Exception("SimpleJSON must be 1.7.3 or later.")
# - 64 bit guid will be munged in js; need to escape or rethink
# - make sure /sync/download is compressed
# - status() should be using the hooks instead
# todo:
# - ensure all urllib references are converted to urllib2 for proxies
# - ability to cancel
# - need to make sure syncing doesn't bump the col modified time if nothing was
# changed, since by default closing the col bumps the mod time
# - ensure the user doesn't add foreign chars to passsword
# Incremental syncing
##########################################################################
from anki.consts import *
class Syncer(object):
def __init__(self, col, server=None):
self.col = col
self.server = server
def status(self, type):
"Override to trace sync progress."
#print "sync:", type
pass
def sync(self):
"Returns 'noChanges', 'fullSync', or 'success'."
# step 1: login & metadata
self.status("login")
self.rmod, rscm, self.maxUsn, rts, self.mediaUsn = self.server.meta()
self.lmod, lscm, self.minUsn, lts, dummy = self.meta()
if abs(rts - lts) > 300:
return "clockOff"
if self.lmod == self.rmod:
return "noChanges"
elif lscm != rscm:
return "fullSync"
self.lnewer = self.lmod > self.rmod
# step 2: deletions and small objects
self.status("meta")
lchg = self.changes()
rchg = self.server.applyChanges(
minUsn=self.minUsn, lnewer=self.lnewer, changes=lchg)
self.mergeChanges(lchg, rchg)
# step 3: stream large tables from server
self.status("server")
while 1:
self.status("stream")
chunk = self.server.chunk()
self.applyChunk(chunk=chunk)
if chunk['done']:
break
# step 4: stream to server
self.status("client")
while 1:
self.status("stream")
chunk = self.chunk()
self.server.applyChunk(chunk=chunk)
if chunk['done']:
break
# step 5: sanity check during beta testing
self.status("sanity")
c = self.sanityCheck()
s = self.server.sanityCheck()
assert c == s
# finalize
self.status("finalize")
mod = self.server.finish()
self.finish(mod)
return "success"
def meta(self):
return (self.col.mod, self.col.scm, self.col._usn, intTime(), None)
def changes(self):
"Bundle up deletions and small objects, and apply if server."
d = dict(models=self.getModels(),
decks=self.getDecks(),
tags=self.getTags(),
graves=self.getGraves())
if self.lnewer:
d['conf'] = self.getConf()
return d
def applyChanges(self, minUsn, lnewer, changes):
# we're the server; save info
self.maxUsn = self.col._usn
self.minUsn = minUsn
self.lnewer = not lnewer
self.rchg = changes
lchg = self.changes()
# merge our side before returning
self.mergeChanges(lchg, self.rchg)
return lchg
def mergeChanges(self, lchg, rchg):
# first, handle the deletions
self.mergeGraves(rchg['graves'])
# then the other objects
self.mergeModels(rchg['models'])
self.mergeDecks(rchg['decks'])
self.mergeTags(rchg['tags'])
if 'conf' in rchg:
self.mergeConf(rchg['conf'])
self.prepareToChunk()
def sanityCheck(self):
# some basic checks to ensure the sync went ok. this is slow, so will
# be removed before official release
assert not self.col.db.scalar("""
select count() from cards where nid not in (select id from notes)""")
assert not self.col.db.scalar("""
select count() from notes where id not in (select distinct nid from cards)""")
for t in "cards", "notes", "revlog", "graves":
assert not self.col.db.scalar(
"select count() from %s where usn = -1" % t)
for g in self.col.decks.all():
assert g['usn'] != -1
for t, usn in self.col.tags.allItems():
assert usn != -1
for m in self.col.models.all():
assert m['usn'] != -1
return [
self.col.db.scalar("select count() from cards"),
self.col.db.scalar("select count() from notes"),
self.col.db.scalar("select count() from revlog"),
self.col.db.scalar("select count() from graves"),
len(self.col.models.all()),
len(self.col.tags.all()),
len(self.col.decks.all()),
len(self.col.decks.allConf()),
]
def usnLim(self):
if self.col.server:
return "usn >= %d" % self.minUsn
else:
return "usn = -1"
def finish(self, mod=None):
if not mod:
# server side; we decide new mod time
mod = intTime(1000)
self.col.ls = mod
self.col._usn = self.maxUsn + 1
self.col.save(mod=mod)
return mod
# Chunked syncing
##########################################################################
def prepareToChunk(self):
self.tablesLeft = ["revlog", "cards", "notes"]
self.cursor = None
def cursorForTable(self, table):
lim = self.usnLim()
x = self.col.db.execute
d = (self.maxUsn, lim)
if table == "revlog":
return x("""
select id, cid, %d, ease, ivl, lastIvl, factor, time, type
from revlog where %s""" % d)
elif table == "cards":
return x("""
select id, nid, did, ord, mod, %d, type, queue, due, ivl, factor, reps,
lapses, left, edue, flags, data from cards where %s""" % d)
else:
return x("""
select id, guid, mid, did, mod, %d, tags, flds, '', '', flags, data
from notes where %s""" % d)
def chunk(self):
buf = dict(done=False)
# gather up to 5000 records
lim = 5000
while self.tablesLeft and lim:
curTable = self.tablesLeft[0]
if not self.cursor:
self.cursor = self.cursorForTable(curTable)
rows = self.cursor.fetchmany(lim)
fetched = len(rows)
if fetched != lim:
# table is empty
self.tablesLeft.pop(0)
self.cursor = None
# if we're the client, mark the objects as having been sent
if not self.col.server:
self.col.db.execute(
"update %s set usn=? where usn=-1"%curTable,
self.maxUsn)
buf[curTable] = rows
lim -= fetched
if not self.tablesLeft:
buf['done'] = True
return buf
def applyChunk(self, chunk):
if "revlog" in chunk:
self.mergeRevlog(chunk['revlog'])
if "cards" in chunk:
self.mergeCards(chunk['cards'])
if "notes" in chunk:
self.mergeNotes(chunk['notes'])
# Deletions
##########################################################################
def getGraves(self):
cards = []
notes = []
decks = []
if self.col.server:
curs = self.col.db.execute(
"select oid, type from graves where usn >= ?", self.minUsn)
else:
curs = self.col.db.execute(
"select oid, type from graves where usn = -1")
for oid, type in curs:
if type == REM_CARD:
cards.append(oid)
elif type == REM_NOTE:
notes.append(oid)
else:
decks.append(oid)
if not self.col.server:
self.col.db.execute("update graves set usn=? where usn=-1",
self.maxUsn)
return dict(cards=cards, notes=notes, decks=decks)
def mergeGraves(self, graves):
# notes first, so we don't end up with duplicate graves
self.col._remNotes(graves['notes'])
self.col.remCards(graves['cards'])
for oid in graves['decks']:
self.col.decks.rem(oid)
# Models
##########################################################################
def getModels(self):
if self.col.server:
return [m for m in self.col.models.all() if m['usn'] >= self.minUsn]
else:
mods = [m for m in self.col.models.all() if m['usn'] == -1]
for m in mods:
m['usn'] = self.maxUsn
self.col.models.save()
return mods
def mergeModels(self, rchg):
for r in rchg:
l = self.col.models.get(r['id'])
# if missing locally or server is newer, update
if not l or r['mod'] > l['mod']:
self.col.models.update(r)
# Decks
##########################################################################
def getDecks(self):
if self.col.server:
return [
[g for g in self.col.decks.all() if g['usn'] >= self.minUsn],
[g for g in self.col.decks.allConf() if g['usn'] >= self.minUsn]
]
else:
decks = [g for g in self.col.decks.all() if g['usn'] == -1]
for g in decks:
g['usn'] = self.maxUsn
dconf = [g for g in self.col.decks.allConf() if g['usn'] == -1]
for g in dconf:
g['usn'] = self.maxUsn
self.col.decks.save()
return [decks, dconf]
def mergeDecks(self, rchg):
for r in rchg[0]:
l = self.col.decks.get(r['id'], False)
# if missing locally or server is newer, update
if not l or r['mod'] > l['mod']:
self.col.decks.update(r)
for r in rchg[1]:
l = self.col.decks.conf(r['id'])
# if missing locally or server is newer, update
if not l or r['mod'] > l['mod']:
self.col.decks.updateConf(r)
# Tags
##########################################################################
def getTags(self):
if self.col.server:
return [t for t, usn in self.col.tags.allItems()
if usn >= self.minUsn]
else:
tags = []
for t, usn in self.col.tags.allItems():
if usn == -1:
self.col.tags.tags[t] = self.maxUsn
tags.append(t)
self.col.tags.save()
return tags
def mergeTags(self, tags):
self.col.tags.register(tags, usn=self.maxUsn)
# Cards/notes/revlog
##########################################################################
def mergeRevlog(self, logs):
self.col.db.executemany(
"insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)",
logs)
def newerRows(self, data, table, modIdx):
ids = (r[0] for r in data)
lmods = {}
for id, mod in self.col.db.execute(
"select id, mod from %s where id in %s and %s" % (
table, ids2str(ids), self.usnLim())):
lmods[id] = mod
update = []
for r in data:
if r[0] not in lmods or lmods[r[0]] < r[modIdx]:
update.append(r)
return update
def mergeCards(self, cards):
self.col.db.executemany(
"insert or replace into cards values "
"(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
self.newerRows(cards, "cards", 4))
def mergeNotes(self, notes):
rows = self.newerRows(notes, "notes", 4)
self.col.db.executemany(
"insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?,?)",
rows)
self.col.updateFieldCache([f[0] for f in rows])
# Col config
##########################################################################
def getConf(self):
return self.col.conf
def mergeConf(self, conf):
self.col.conf = conf
# Local syncing for unit tests
##########################################################################
class LocalServer(Syncer):
# serialize/deserialize payload, so we don't end up sharing objects
# between cols
def applyChanges(self, minUsn, lnewer, changes):
l = simplejson.loads; d = simplejson.dumps
return l(d(Syncer.applyChanges(self, minUsn, lnewer, l(d(changes)))))
# HTTP syncing tools
##########################################################################
class HttpSyncer(object):
# retrieving a host key for future operations
def hostKey(self, pw):
h = httplib2.Http(timeout=60)
resp, cont = h.request(
SYNC_URL+"hostKey?" + urllib.urlencode(dict(u=self.user,p=pw)))
if resp['status'] != '200':
raise Exception("Invalid response code: %s" % resp['status'])
self.hkey = simplejson.loads(cont)['key']
return self.hkey
def _vars(self):
return dict(k=self.hkey)
# Posting data as a file
######################################################################
# We don't want to post the payload as a form var, as the percent-encoding is
# costly. We could send it as a raw post, but more HTTP clients seem to
# support file uploading, so this is the more compatible choice.
def postData(self, http, method, fobj, vars, comp=1):
bdry = "--"+MIME_BOUNDARY
# write out post vars, including session key and compression flag
buf = StringIO()
vars = vars or {}
vars['c'] = 1 if comp else 0
for (key, value) in vars.items():
buf.write(bdry + "\r\n")
buf.write(
'Content-Disposition: form-data; name="%s"\r\n\r\n%s\r\n' %
(key, value))
# file header
if fobj:
buf.write(bdry + "\r\n")
buf.write("""\
Content-Disposition: form-data; name="data"; filename="data"\r\n\
Content-Type: application/octet-stream\r\n\r\n""")
# write file into buffer, optionally compressing
if comp:
tgt = gzip.GzipFile(mode="wb", fileobj=buf, compresslevel=comp)
else:
tgt = buf
while 1:
data = fobj.read(CHUNK_SIZE)
if not data:
if comp:
tgt.close()
break
tgt.write(data)
buf.write('\r\n' + bdry + '--\r\n')
size = buf.tell()
# connection headers
headers = {
'Content-Type': 'multipart/form-data; boundary=%s' % MIME_BOUNDARY,
'Content-Length': str(size),
}
body = buf.getvalue()
buf.close()
resp, cont = http.request(
SYNC_URL+method, "POST", headers=headers, body=body)
if resp['status'] != '200':
raise Exception("Invalid response code: %s" % resp['status'])
return cont
# Incremental sync over HTTP
######################################################################
class RemoteServer(Syncer, HttpSyncer):
def __init__(self, user, hkey):
self.user = user
self.hkey = hkey
self.con = None
def meta(self):
h = httplib2.Http(timeout=60)
resp, cont = h.request(
SYNC_URL+"meta?" + urllib.urlencode(dict(u=self.user,v=SYNC_VER)))
# fixme: convert these into easily-catchable errors
if resp['status'] in ('503', '504'):
raise Exception("Server is too busy; please try again later.")
elif resp['status'] == '501':
raise Exception("Your client is out of date; please upgrade.")
elif resp['status'] == '403':
raise Exception("Invalid key; please authenticate.")
elif resp['status'] != '200':
raise Exception("Invalid response code: %s" % resp['status'])
return simplejson.loads(cont)
def applyChanges(self, **kw):
self.con = httplib2.Http(timeout=60)
return self._run("applyChanges", kw)
def chunk(self, **kw):
return self._run("chunk", kw)
def applyChunk(self, **kw):
return self._run("applyChunk", kw)
def sanityCheck(self, **kw):
return self._run("sanityCheck", kw)
def finish(self, **kw):
return self._run("finish", kw)
def _run(self, cmd, data):
return simplejson.loads(
self.postData(self.con, cmd, StringIO(simplejson.dumps(data)),
self._vars()))
# Full syncing
##########################################################################
class FullSyncer(HttpSyncer):
def __init__(self, col, hkey):
self.col = col
self.hkey = hkey
def _con(self):
return httplib2.Http(timeout=60)
def download(self):
self.col.close()
resp, cont = self._con().request(
SYNC_URL+"download?" + urllib.urlencode(self._vars()))
if resp['status'] != '200':
raise Exception("Invalid response code: %s" % resp['status'])
tpath = self.col.path + ".tmp"
open(tpath, "wb").write(cont)
os.unlink(self.col.path)
os.rename(tpath, self.col.path)
d = DB(self.col.path)
assert d.scalar("pragma integrity_check") == "ok"
self.col = None
def upload(self):
self.col.beforeUpload()
assert self.postData(self._con(), "upload", open(self.col.path, "rb"),
self._vars(), comp=6) == "OK"
# Media syncing
##########################################################################
class MediaSyncer(object):
def __init__(self, col, server=None):
self.col = col
self.server = server
self.added = None
def sync(self, mediaUsn):
# step 1: check if there have been any changes
self.col.media.findChanges()
lusn = self.col.media.usn()
if lusn == mediaUsn and not self.col.media.hasChanged():
return "noChanges"
# step 2: send/recv deletions
runHook("mediaSync", "remove")
lrem = self.removed()
rrem = self.server.remove(fnames=lrem, minUsn=lusn)
self.remove(rrem)
# step 3: stream files from server
runHook("mediaSync", "server")
while 1:
runHook("mediaSync", "stream")
zip = self.server.files()
if self.addFiles(zip=zip) != "continue":
break
# step 4: stream files to the server
runHook("mediaSync", "client")
while 1:
runHook("mediaSync", "stream")
zip = self.files()
usn = self.server.addFiles(zip=zip)
if usn != "continue":
# when server has run out of files, it returns bumped usn
break
# step 5: finalize
self.col.media.setUsn(usn)
self.col.media.clearLog()
# clear cursor so successive calls work
self.added = None
return "success"
def removed(self):
return self.col.media.removed()
def remove(self, fnames, minUsn=None):
self.col.media.syncRemove(fnames)
if minUsn is not None:
# we're the server
self.minUsn = minUsn
return self.col.media.removed()
def files(self):
if not self.added:
self.added = self.col.media.added()
return self.col.media.zipFromAdded(self.added)
def addFiles(self, zip):
"True if zip is the last in set. Server returns new usn instead."
return self.col.media.syncAdd(zip)
# Remote media syncing
##########################################################################
class RemoteMediaServer(MediaSyncer, HttpSyncer):
def __init__(self, hkey):
self.hkey = hkey
self.con = httplib2.Http(timeout=60)
def remove(self, **kw):
return simplejson.loads(
self.postData(
self.con, "remove", StringIO(simplejson.dumps(kw)),
self._vars()))
def files(self):
return self.postData(self.con, "files", None, self._vars())
def addFiles(self, zip):
return simplejson.loads(
self.postData(self.con, "addFiles", StringIO(zip),
self._vars(), comp=0))
# only for unit tests
def mediatest(self, n):
return simplejson.loads(
self.postData(self.con, "mediatest", StringIO(
simplejson.dumps(dict(n=n))), self._vars()))