Anki/tests/test_sync.py
Damien Elmes 362ae3eee2 initial work on sync refactor
Ported the sync code to the latest libanki structure. Key points:

No summary:

The old style got each side to fetch ids+mod times and required the client to
diff them and then request or bundle up the appropriate objects. Instead, we now
get each side to send all changed objects, and it's the responsibility of the
other side to decide what needs to be merged and what needs to be discarded.
This allows us to skip a separate summary step, which saves scanning tables
twice, and allows us to reduce server requests from 4 to 3.

Schema changes:

Certain operations that are difficult to merge (such as changing the number of
fields in a model, or deleting models or groups) result in a full sync. The
user is warned about it in the GUI before such schema-changing operations
execute.

Sync size:

For now, we don't try to deal with large incremental syncs. Because the cards,
facts and revlog can be large in memory (hundreds of megabytes in some cases),
they would have to be chunked for the benefit of devices with a low amount of
memory.

Currently findChanges() uses the full fact/card objects which we're planning to
send to the server. It could be rewritten to fetch a summary (just the id, mod
& rep columns) which would save some memory, and then compare against blocks
of a few hundred remote objects at a time. However, it's a bit more
complicated than that:

- If the local summary is huge it could exceed memory limits. Without a local
  summary we'd have to query the db for each record, which could be a lot
  slower.

- We currently accumulate a list of remote records we need to add locally.
  This list also has the potential to get too big. We would need to
  periodically commit the changes as we accumulate them.

- Merging a large amount of changes is also potentially slow on mobile
  devices.

Given the fact that certain schema-changing operations require a full sync
anyway, I think it's probably best to concentrate on a chunked full sync for
now instead, as provided the user syncs periodically it should not be easy to
hit the full sync limits except after bulk editing operations.

Chunked partial syncing should be possible to add in the future without any
changes to the deck format.

Still to do:
- deck conf merging
- full syncing
- new http proxy
2011-09-08 12:50:42 +09:00

128 lines
4.1 KiB
Python

# coding: utf-8
import nose, os, tempfile, shutil, time
from tests.shared import assertException
from anki.errors import *
from anki import Deck
from anki.utils import intTime
from anki.sync import Syncer, LocalServer
from anki.facts import Fact
from anki.cards import Card
from tests.shared import getEmptyDeck
#import psyco; psyco.profile()
# Local tests
##########################################################################
deck1=None
deck2=None
client=None
server=None
def setup_basic(loadDecks=None):
global deck1, deck2, client, server
if loadDecks:
deck1 = Deck(loadDecks[0], backup=False)
deck2 = Deck(loadDecks[1], backup=False)
else:
deck1 = getEmptyDeck()
# add a fact to deck 1
f = deck1.newFact()
f['Front'] = u"foo"; f['Back'] = u"bar"; f.tags = [u"foo"]
deck1.addFact(f)
# answer it
deck1.reset(); deck1.sched.answerCard(deck1.sched.getCard(), 4)
# repeat for deck2; sleep a tick so we have different ids
deck2 = getEmptyDeck()
f = deck2.newFact()
f['Front'] = u"bar"; f['Back'] = u"bar"; f.tags = [u"bar"]
deck2.addFact(f)
deck2.reset(); deck2.sched.answerCard(deck2.sched.getCard(), 4)
# start with same schema and sync time
deck1.lastSync = deck2.lastSync = intTime() - 1
deck1.scm = deck2.scm = 0
# and same mod time, so sync does nothing
deck1.save(); deck2.save()
server = LocalServer(deck2)
client = Syncer(deck1, server)
def setup_modified():
setup_basic()
# mark deck1 as changed
deck1.save(mod=intTime()+1)
@nose.with_setup(setup_basic)
def test_nochange():
assert client.sync() == "noChanges"
@nose.with_setup(setup_modified)
def test_changedSchema():
deck1.scm += 1
assert client.sync() == "fullSync"
@nose.with_setup(setup_modified)
def test_sync():
def check(num):
for d in deck1, deck2:
for t in ("revlog", "facts", "cards", "fsums"):
assert d.db.scalar("select count() from %s" % t) == num
assert len(d.models.all()) == num*2
# the default group and config have an id of 1, so always 1
assert len(d.groups.all()) == 1
assert len(d.groups.gconf) == 1
assert len(d.tags.all()) == num
check(1)
origLs = deck1.lastSync
assert client.sync() == "success"
# last sync times and mod times should agree
assert deck1.mod == deck2.mod
assert deck1.lastSync == deck2.lastSync
assert deck1.lastSync != origLs
# because everything was created separately it will be merged in. in
# actual use we use a full sync to ensure initial a common starting point.
check(2)
# repeating it does nothing
assert client.sync() == "noChanges"
# if we bump mod time, everything is copied across again because of the
# 600 second sync leeway. but the decks should remain the same.
deck1.save(mod=intTime()+2)
assert client.sync() == "success"
check(2)
@nose.with_setup(setup_modified)
def test_models():
test_sync()
# update model one
cm = deck1.models.current()
cm['name'] = "new"
cm['mod'] = intTime() + 1
deck1.save(mod=intTime()+1)
assert deck2.models.get(cm['id'])['name'] == "Basic"
assert client.sync() == "success"
assert deck2.models.get(cm['id'])['name'] == "new"
# deleting triggers a full sync
deck1.scm = deck2.scm = 0
deck1.models.rem(cm)
deck1.save(mod=intTime()+1)
assert client.sync() == "fullSync"
@nose.with_setup(setup_modified)
def test_facts():
test_sync()
# modifications should be synced
fid = deck1.db.scalar("select id from facts")
fact = deck1.getFact(fid)
assert fact['Front'] != "abc"
fact['Front'] = "abc"
fact.flush(mod=intTime()+1)
deck1.save(mod=intTime()+1)
assert client.sync() == "success"
assert deck2.getFact(fid)['Front'] == "abc"
# deletions too
deck1.remFacts([fid])
deck1.save(mod=intTime()+1)
assert client.sync() == "success"
assert not deck1.db.scalar("select 1 from facts where id = ?", fid)
assert not deck2.db.scalar("select 1 from facts where id = ?", fid)