initial work on sync refactor

Ported the sync code to the latest libanki structure. Key points: No summary: The old style got each side to fetch ids+mod times and required the client to diff them and then request or bundle up the appropriate objects. Instead, we now get each side to send all changed objects, and it's the responsibility of the other side to decide what needs to be merged and what needs to be discarded. This allows us to skip a separate summary step, which saves scanning tables twice, and allows us to reduce server requests from 4 to 3. Schema changes: Certain operations that are difficult to merge (such as changing the number of fields in a model, or deleting models or groups) result in a full sync. The user is warned about it in the GUI before such schema-changing operations execute. Sync size: For now, we don't try to deal with large incremental syncs. Because the cards, facts and revlog can be large in memory (hundreds of megabytes in some cases), they would have to be chunked for the benefit of devices with a low amount of memory. Currently findChanges() uses the full fact/card objects which we're planning to send to the server. It could be rewritten to fetch a summary (just the id, mod & rep columns) which would save some memory, and then compare against blocks of a few hundred remote objects at a time. However, it's a bit more complicated than that: - If the local summary is huge it could exceed memory limits. Without a local summary we'd have to query the db for each record, which could be a lot slower. - We currently accumulate a list of remote records we need to add locally. This list also has the potential to get too big. We would need to periodically commit the changes as we accumulate them. - Merging a large amount of changes is also potentially slow on mobile devices. Given the fact that certain schema-changing operations require a full sync anyway, I think it's probably best to concentrate on a chunked full sync for now instead, as provided the user syncs periodically it should not be easy to hit the full sync limits except after bulk editing operations. Chunked partial syncing should be possible to add in the future without any changes to the deck format. Still to do: - deck conf merging - full syncing - new http proxy
2025-12-30 23:32:57 -05:00 · 2011-09-08 12:47:53 +09:00 · 2011-09-08 12:47:53 +09:00 · 362ae3eee2
commit 362ae3eee2
parent 7034c1ed29
9 changed files with 474 additions and 916 deletions
--- a/anki/errors.py
+++ b/anki/errors.py
@ -11,3 +11,7 @@ class AnkiError(Exception):
        if self.data:
            m += ": %s" % repr(self.data)
        return m
+
+class SyncTooLarge(Exception):
+    pass
+
--- a/anki/facts.py
+++ b/anki/facts.py
@ -38,8 +38,8 @@ select mid, gid, mod, tags, flds, data from facts where id = ?""", self.id)
        self._model = self.deck.models.get(self.mid)
        self._fmap = self.deck.models.fieldMap(self._model)

-    def flush(self):
-        self.mod = intTime()
+    def flush(self, mod=None):
+        self.mod = mod if mod else intTime()
        sfld = stripHTML(self.fields[self.deck.models.sortIdx(self._model)])
        tags = self.stringTags()
        res = self.deck.db.execute("""
--- a/anki/groups.py
+++ b/anki/groups.py
@ -72,9 +72,10 @@ class GroupManager(object):
        self.gconf = simplejson.loads(gconf)
        self.changed = False

-    def save(self, g):
+    def save(self, g=None):
        "Can be called with either a group or a group configuration."
-        g['mod'] = intTime()
+        if g:
+            g['mod'] = intTime()
        self.changed = True

    def flush(self):
@ -126,6 +127,10 @@ class GroupManager(object):
        "A list of all groups."
        return self.groups.values()

+    def allConf(self):
+        "A list of all group config."
+        return self.gconf.values()
+
    def _ensureParents(self, name):
        path = name.split("::")
        s = ""
@ -146,12 +151,24 @@ class GroupManager(object):
        return self.gconf[str(self.groups[str(gid)]['conf'])]

    def get(self, gid):
-        return self.groups[str(gid)]
+        id = str(gid)
+        if id in self.groups:
+            return self.groups[id]

    def setGroup(self, cids, gid):
        self.db.execute(
            "update cards set gid = ? where id in "+ids2str(cids), gid)

+    def update(self, g):
+        "Add or update an existing group. Used for syncing and merging."
+        self.groups[str(g['id'])] = g
+        # mark registry changed, but don't bump mod time
+        self.save()
+
+    def updateConf(self, g):
+        self.gconf[str(g['id'])] = g
+        self.save()
+
    # Group selection
    #############################################################

--- a/anki/importing/init.py
+++ b/anki/importing/init.py
@ -19,6 +19,9 @@ from anki.utils import fieldChecksum, ids2str
 from anki.errors import *
 #from anki.deck import NEW_CARDS_RANDOM

+# FIXME: when importing an anki file, if any revlog entries are less than the
+# last sync time, we need to bump the deck schema
+
 # Base importer
 ##########################################################################

--- a/anki/importing/anki10.py
+++ b/anki/importing/anki10.py
@ -4,7 +4,6 @@

 from anki import Deck
 from anki.importing import Importer
-from anki.sync import SyncClient, SyncServer, copyLocalMedia
 from anki.lang import _
 from anki.utils import ids2str
 #from anki.deck import NEW_CARDS_RANDOM
--- a/anki/models.py
+++ b/anki/models.py
@ -98,8 +98,10 @@ class ModelManager(object):
        self.deck.groups.top()['curModel'] = m['id']

    def get(self, id):
-        "Get model with ID."
-        return self.models[str(id)]
+        "Get model with ID, or None."
+        id = str(id)
+        if id in self.models:
+            return self.models[id]

    def all(self):
        "Get all models."
@ -139,11 +141,16 @@ select id from cards where fid in (select id from facts where mid = ?)""",

    def _add(self, m):
        self._setID(m)
-        self.models[m['id']] = m
-        self.save(m)
+        self.update(m)
        self.setCurrent(m)
        return m

+    def update(self, m):
+        "Add or update an existing model. Used for syncing and merging."
+        self.models[str(m['id'])] = m
+        # mark registry changed, but don't bump mod time
+        self.save()
+
    def _setID(self, m):
        while 1:
            id = str(intTime(1000))
--- a/anki/sync.py
+++ b/anki/sync.py
--- a/anki/tags.py
+++ b/anki/tags.py
@ -57,6 +57,9 @@ class TagManager(object):
        self.register(set(self.split(
            " ".join(self.deck.db.list("select distinct tags from facts"+lim)))))

+    def allSince(self, mod):
+        return [k for k,v in self.tags.items() if v > mod]
+
    # Bulk addition/removal from facts
    #############################################################

--- a/tests/test_sync.py
+++ b/tests/test_sync.py
@ -6,8 +6,7 @@ from tests.shared import assertException
 from anki.errors import *
 from anki import Deck
 from anki.utils import intTime
-from anki.sync import SyncClient, SyncServer, HttpSyncServer, HttpSyncServerProxy
-from anki.sync import copyLocalMedia
+from anki.sync import Syncer, LocalServer
 from anki.facts import Fact
 from anki.cards import Card
 from tests.shared import getEmptyDeck
@ -22,306 +21,108 @@ deck2=None
 client=None
 server=None

-def setup_local(loadDecks=None):
+def setup_basic(loadDecks=None):
    global deck1, deck2, client, server
    if loadDecks:
        deck1 = Deck(loadDecks[0], backup=False)
        deck2 = Deck(loadDecks[1], backup=False)
    else:
        deck1 = getEmptyDeck()
+        # add a fact to deck 1
        f = deck1.newFact()
        f['Front'] = u"foo"; f['Back'] = u"bar"; f.tags = [u"foo"]
        deck1.addFact(f)
-        deck1.syncName = "abc"
+        # answer it
+        deck1.reset(); deck1.sched.answerCard(deck1.sched.getCard(), 4)
+        # repeat for deck2; sleep a tick so we have different ids
        deck2 = getEmptyDeck()
        f = deck2.newFact()
-        f['Front'] = u"foo"; f['Back'] = u"bar"; f.tags = [u"foo"]
+        f['Front'] = u"bar"; f['Back'] = u"bar"; f.tags = [u"bar"]
        deck2.addFact(f)
-        deck2.syncName = "abc"
-        deck1.lastSync = deck2.lastSync = intTime()
+        deck2.reset(); deck2.sched.answerCard(deck2.sched.getCard(), 4)
+        # start with same schema and sync time
+        deck1.lastSync = deck2.lastSync = intTime() - 1
        deck1.scm = deck2.scm = 0
-        time.sleep(1)
-        # now add another fact to deck1 that hasn't been synced yet
-        f = deck1.newFact()
-        f['Front'] = u"bar"; f['Back'] = u"baz"
-        deck1.addFact(f)
-        # and another to deck2
-        f = deck2.newFact()
-        f['Front'] = u"qux"; f['Back'] = u"baz"
-        deck2.addFact(f)
-        deck2.reset()
-        c = deck2.sched.getCard()
-        deck2.sched.answerCard(c, 3)
-        # change deck1's model
-        deck1.currentModel().flush()
+        # and same mod time, so sync does nothing
        deck1.save(); deck2.save()
-    client = SyncClient(deck1)
-    server = SyncServer(deck2)
-    print "deck1", client.deck.db.all("select * from facts")
-    print "deck2", server.deck.db.all("select * from facts")
-    client.setServer(server)
+    server = LocalServer(deck2)
+    client = Syncer(deck1, server)

-def teardown():
-    pass
+def setup_modified():
+    setup_basic()
+    # mark deck1 as changed
+    deck1.save(mod=intTime()+1)

-@nose.with_setup(setup_local, teardown)
-def _test_changes():
-    deck2.scm = 0
-    dels = client.deletions(deck1.lastSync)
-    rem = server.changes(deck1.lastSync, dels)
-    client.delete(rem['deletions'])
-    assert rem
-    client.rewriteIds(rem)
-    loc = client.changes(deck1.lastSync)
-    assert loc
-    l, r = client.diff(loc, rem, "facts", 3)
-    # local id is larger
-    assert l[0][0] == 3
-    assert r[0][0] == 2
+@nose.with_setup(setup_basic)
+def test_nochange():
+    assert client.sync() == "noChanges"

-    keys = ("models", "groups", "gconf", "facts", "cards")
-    keys2 = ("revlog", "tags")
+@nose.with_setup(setup_modified)
+def test_changedSchema():
+    deck1.scm += 1
+    assert client.sync() == "fullSync"

-    proc = {}
-    resp = {}
-    for type in keys:
-        l, r = getattr(client, 'diff'+type.capitalize())(loc, rem)
-        proc[type] = r
-        resp[type] = l
-    for type in keys2:
-        l = loc[type]; r = rem[type]
-        proc[type] = r
-        resp[type] = l
+@nose.with_setup(setup_modified)
+def test_sync():
+    def check(num):
+        for d in deck1, deck2:
+            for t in ("revlog", "facts", "cards", "fsums"):
+                assert d.db.scalar("select count() from %s" % t) == num
+            assert len(d.models.all()) == num*2
+            # the default group and config have an id of 1, so always 1
+            assert len(d.groups.all()) == 1
+            assert len(d.groups.gconf) == 1
+            assert len(d.tags.all()) == num
+    check(1)
+    origLs = deck1.lastSync
+    assert client.sync() == "success"
+    # last sync times and mod times should agree
+    assert deck1.mod == deck2.mod
+    assert deck1.lastSync == deck2.lastSync
+    assert deck1.lastSync != origLs
+    # because everything was created separately it will be merged in. in
+    # actual use we use a full sync to ensure initial a common starting point.
+    check(2)
+    # repeating it does nothing
+    assert client.sync() == "noChanges"
+    # if we bump mod time, everything is copied across again because of the
+    # 600 second sync leeway. but the decks should remain the same.
+    deck1.save(mod=intTime()+2)
+    assert client.sync() == "success"
+    check(2)

-    for type in keys + keys2:
-        getattr(client, 'update'+type.capitalize())(proc[type])
+@nose.with_setup(setup_modified)
+def test_models():
+    test_sync()
+    # update model one
+    cm = deck1.models.current()
+    cm['name'] = "new"
+    cm['mod'] = intTime() + 1
+    deck1.save(mod=intTime()+1)
+    assert deck2.models.get(cm['id'])['name'] == "Basic"
+    assert client.sync() == "success"
+    assert deck2.models.get(cm['id'])['name'] == "new"
+    # deleting triggers a full sync
+    deck1.scm = deck2.scm = 0
+    deck1.models.rem(cm)
+    deck1.save(mod=intTime()+1)
+    assert client.sync() == "fullSync"

-    for type in keys + keys2:
-        getattr(server, 'update'+type.capitalize())(resp[type])
-
-    print "deck1", client.deck.db.all("select * from revlog")
-    print "deck2", server.deck.db.all("select * from revlog")
-
-    #client.process(loc, rem)
-
-
-# @nose.with_setup(setup_local, teardown)
-# def test_localsync_deck():
-#     # deck two was modified last
-#     assert deck2.modified > deck1.modified
-#     d2mod = deck2.modified
-#     assert deck1.lastSync == 0 and deck2.lastSync == 0
-#     client.sync()
-#     assert deck1.modified == deck2.modified
-#     assert deck1.modified <= deck1.lastSync
-#     assert deck1.lastSync == deck2.lastSync
-#     # ensure values are being synced
-#     deck1.lowPriority += u",foo"
-#     deck1.setModified()
-#     client.sync()
-#     assert "foo" in deck2.lowPriority
-#     assert deck1.modified == deck2.modified
-#     assert deck1.lastSync == deck2.lastSync
-#     deck2.description = u"newname"
-#     deck2.setModified()
-#     client.sync()
-#     assert deck1.description == u"newname"
-#     # the most recent change should take precedence
-#     deck1.description = u"foo"
-#     deck1.setModified()
-#     deck2.description = u"bar"
-#     deck2.setModified()
-#     client.sync()
-#     assert deck1.description == "bar"
-#     # answer a card to ensure stats & history are copied
-#     c = deck1.getCard()
-#     deck1.answerCard(c, 4)
-#     client.sync()
-#     assert deck2.db.scalar("select count(*) from revlog") == 1
-#     # make sure meta data is synced
-#     deck1.setVar("foo", 1)
-#     assert deck1.getInt("foo") == 1
-#     assert deck2.getInt("foo") is None
-#     client.sync()
-#     assert deck1.getInt("foo") == 1
-#     assert deck2.getInt("foo") == 1
-
-# @nose.with_setup(setup_local, teardown)
-# def test_localsync_models():
-#     client.sync()
-#     # add a model
-#     deck1.addModel(BasicModel())
-#     assert len(deck1.models) == 3
-#     assert len(deck2.models) == 2
-#     deck1.setVar("schemaMod", 0)
-#     client.sync()
-#     assert len(deck2.models) == 3
-#     assert deck1.currentModel.id == deck2.currentModel.id
-#     # delete the recently added model
-#     deck2.deleteModel(deck2.currentModel)
-#     assert len(deck2.models) == 2
-#     deck2.setVar("schemaMod", 0)
-#     client.sync()
-#     assert len(deck1.models) == 2
-#     assert deck1.currentModel.id == deck2.currentModel.id
-#     # make a card model inactive
-#     assert deck1.currentModel.cardModels[1].active == True
-#     deck2.currentModel.cardModels[1].active = False
-#     deck2.currentModel.setModified()
-#     deck2.flushMod()
-#     client.sync()
-#     assert deck1.currentModel.cardModels[1].active == False
-#     # remove a card model
-#     deck1.deleteCardModel(deck1.currentModel,
-#                           deck1.currentModel.cardModels[1])
-#     deck1.currentModel.setModified()
-#     deck1.setModified()
-#     assert len(deck1.currentModel.cardModels) == 1
-#     deck1.setVar("schemaMod", 0)
-#     client.sync()
-#     assert len(deck2.currentModel.cardModels) == 1
-#     # rename a field
-#     c = deck1.getCard()
-#     assert u"Front" in c.fact.keys()
-#     deck1.renameFieldModel(deck1.currentModel,
-#                            deck1.currentModel.fieldModels[0],
-#                            u"Sideways")
-#     client.sync()
-#     assert deck2.currentModel.fieldModels[0].name == u"Sideways"
-
-# @nose.with_setup(setup_local, teardown)
-# def test_localsync_factsandcards():
-#     assert deck1.factCount() == 1 and deck1.cardCount() == 2
-#     assert deck2.factCount() == 1 and deck2.cardCount() == 2
-#     client.sync()
-#     deck1.reset(); deck2.reset()
-#     assert deck1.factCount() == 2 and deck1.cardCount() == 4
-#     assert deck2.factCount() == 2 and deck2.cardCount() == 4
-#     # ensure the fact was copied across
-#     f1 = deck1.db.query(Fact).first()
-#     f2 = deck1.db.query(Fact).get(f1.id)
-#     f1['Front'] = u"myfront"
-#     f1.setModified()
-#     deck1.setModified()
-#     client.sync()
-#     deck1.rebuildCounts()
-#     deck2.rebuildCounts()
-#     f2 = deck1.db.query(Fact).get(f1.id)
-#     assert f2['Front'] == u"myfront"
-#     c1 = deck1.getCard()
-#     c2 = deck2.getCard()
-#     assert c1.id == c2.id
-
-# @nose.with_setup(setup_local, teardown)
-# def test_localsync_threeway():
-#     # deck1 (client) <-> deck2 (server) <-> deck3 (client)
-#     deck3 = Deck()
-#     client2 = SyncClient(deck3)
-#     server2 = SyncServer(deck2)
-#     client2.setServer(server2)
-#     client.sync()
-#     client2.sync()
-#     # add a new question
-#     f = deck1.newFact()
-#     f['Front'] = u"a"; f['Back'] = u"b"
-#     f = deck1.addFact(f)
-#     card = f.cards[0]
-#     client.sync()
-#     assert deck1.cardCount() == 6
-#     assert deck2.cardCount() == 6
-#     # check it propagates from server to deck3
-#     client2.sync()
-#     assert deck3.cardCount() == 6
-#     # delete a card on deck1
-#     deck1.deleteCard(card.id)
-#     client.sync()
-#     deck1.reset(); deck2.reset()
-#     assert deck1.cardCount() == 5
-#     assert deck2.cardCount() == 5
-#     # make sure the delete is now propagated from the server to deck3
-#     client2.sync()
-#     assert deck3.cardCount() == 5
-
-# def test_localsync_media():
-#     tmpdir = "/tmp/media-tests"
-#     try:
-#         shutil.rmtree(tmpdir)
-#     except OSError:
-#         pass
-#     shutil.copytree(os.path.join(os.path.dirname(__file__), "..",
-#                                  "tests/syncing/media-tests"),
-#                     tmpdir)
-#     deck1anki = os.path.join(tmpdir, "1.anki")
-#     deck2anki = os.path.join(tmpdir, "2.anki")
-#     deck1media = os.path.join(tmpdir, "1.media")
-#     deck2media = os.path.join(tmpdir, "2.media")
-#     setup_local((deck1anki, deck2anki))
-#     assert len(os.listdir(deck1media)) == 2
-#     assert len(os.listdir(deck2media)) == 1
-#     client.sync()
-#     # metadata should have been copied
-#     assert deck1.db.scalar("select count(1) from media") == 3
-#     assert deck2.db.scalar("select count(1) from media") == 3
-#     # copy local files
-#     copyLocalMedia(deck1, deck2)
-#     assert len(os.listdir(deck1media)) == 2
-#     assert len(os.listdir(deck2media)) == 3
-#     copyLocalMedia(deck2, deck1)
-#     assert len(os.listdir(deck1media)) == 3
-#     assert len(os.listdir(deck2media)) == 3
-#     # check delete
-#     os.unlink(os.path.join(deck1media, "22161b29b0c18e068038021f54eee1ee.png"))
-#     rebuildMediaDir(deck1)
-#     client.sync()
-#     assert deck1.db.scalar("select count(1) from media") == 3
-#     assert deck2.db.scalar("select count(1) from media") == 3
-
-# # Remote tests
-# ##########################################################################
-
-# # a replacement runCmd which just calls our server directly
-# def runCmd(action, *args, **kargs):
-#     #print action, kargs
-#     return server.unstuff(apply(getattr(server, action), tuple(args) +
-#                                 tuple(kargs.values())))
-
-# def setup_remote():
-#     setup_local()
-#     global client, server
-#     proxy = HttpSyncServerProxy("test", "foo")
-#     client = SyncClient(deck1)
-#     client.setServer(proxy)
-#     proxy.deckName = "test"
-#     proxy.runCmd = runCmd
-#     server = HttpSyncServer()
-#     server.deck = deck2
-#     server.decks = {"test": (deck2.modified, 0)}
-
-# @nose.with_setup(setup_remote, teardown)
-# def test_remotesync_fromserver():
-#     # deck two was modified last
-#     assert deck2.modified > deck1.modified
-#     client.sync()
-#     assert deck2.modified == deck1.modified
-#     # test deck vars
-#     deck1.setVar("foo", 1)
-#     client.sync()
-
-# @nose.with_setup(setup_remote, teardown)
-# def test_remotesync_toserver():
-#     deck1.setModified()
-#     client.sync()
-#     assert deck2.modified == deck1.modified
-
-# # Full sync
-# ##########################################################################
-
-# @nose.with_setup(setup_remote, teardown)
-# def test_formdata():
-#     global deck1
-#     (fd, name) = tempfile.mkstemp()
-#     deck1 = deck1.saveAs(name)
-#     deck1.setModified()
-#     client.deck = deck1
-#     client.prepareSync(0)
-#     client.prepareFullSync()
+@nose.with_setup(setup_modified)
+def test_facts():
+    test_sync()
+    # modifications should be synced
+    fid = deck1.db.scalar("select id from facts")
+    fact = deck1.getFact(fid)
+    assert fact['Front'] != "abc"
+    fact['Front'] = "abc"
+    fact.flush(mod=intTime()+1)
+    deck1.save(mod=intTime()+1)
+    assert client.sync() == "success"
+    assert deck2.getFact(fid)['Front'] == "abc"
+    # deletions too
+    deck1.remFacts([fid])
+    deck1.save(mod=intTime()+1)
+    assert client.sync() == "success"
+    assert not deck1.db.scalar("select 1 from facts where id = ?", fid)
+    assert not deck2.db.scalar("select 1 from facts where id = ?", fid)