Anki/anki/groups.py
Damien Elmes 362ae3eee2 initial work on sync refactor
Ported the sync code to the latest libanki structure. Key points:

No summary:

The old style got each side to fetch ids+mod times and required the client to
diff them and then request or bundle up the appropriate objects. Instead, we now
get each side to send all changed objects, and it's the responsibility of the
other side to decide what needs to be merged and what needs to be discarded.
This allows us to skip a separate summary step, which saves scanning tables
twice, and allows us to reduce server requests from 4 to 3.

Schema changes:

Certain operations that are difficult to merge (such as changing the number of
fields in a model, or deleting models or groups) result in a full sync. The
user is warned about it in the GUI before such schema-changing operations
execute.

Sync size:

For now, we don't try to deal with large incremental syncs. Because the cards,
facts and revlog can be large in memory (hundreds of megabytes in some cases),
they would have to be chunked for the benefit of devices with a low amount of
memory.

Currently findChanges() uses the full fact/card objects which we're planning to
send to the server. It could be rewritten to fetch a summary (just the id, mod
& rep columns) which would save some memory, and then compare against blocks
of a few hundred remote objects at a time. However, it's a bit more
complicated than that:

- If the local summary is huge it could exceed memory limits. Without a local
  summary we'd have to query the db for each record, which could be a lot
  slower.

- We currently accumulate a list of remote records we need to add locally.
  This list also has the potential to get too big. We would need to
  periodically commit the changes as we accumulate them.

- Merging a large amount of changes is also potentially slow on mobile
  devices.

Given the fact that certain schema-changing operations require a full sync
anyway, I think it's probably best to concentrate on a chunked full sync for
now instead, as provided the user syncs periodically it should not be easy to
hit the full sync limits except after bulk editing operations.

Chunked partial syncing should be possible to add in the future without any
changes to the deck format.

Still to do:
- deck conf merging
- full syncing
- new http proxy
2011-09-08 12:50:42 +09:00

211 lines
6 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import simplejson
from anki.utils import intTime
from anki.consts import *
# fixmes:
# - make sure lists like new[delays] are not being shared by multiple groups
# - make sure all children have parents (create as necessary)
# - when renaming a group, top level properties should be added or removed as
# appropriate
# configuration only available to top level groups
defaultTopConf = {
'newPerDay': 20,
'newToday': [0, 0], # currentDay, count
'revToday': [0, 0],
'lrnToday': [0, 0],
'timeToday': [0, 0], # currentDay, time in ms
'newTodayOrder': NEW_TODAY_ORD,
'newSpread': NEW_CARDS_DISTRIBUTE,
'collapseTime': 1200,
'repLim': 0,
'timeLim': 600,
'curModel': None,
}
# configuration available to all groups
defaultConf = {
'new': {
'delays': [1, 10],
'ints': [1, 7, 4],
'initialFactor': 2500,
},
'lapse': {
'delays': [1, 10],
'mult': 0,
'minInt': 1,
'relearn': True,
'leechFails': 8,
# type 0=suspend, 1=tagonly
'leechAction': 0,
},
'cram': {
'delays': [1, 5, 10],
'resched': True,
'reset': True,
'mult': 0,
'minInt': 1,
},
'rev': {
'ease4': 1.3,
'fuzz': 0.05,
'minSpace': 1,
},
'maxTaken': 60,
'mod': 0,
}
class GroupManager(object):
# Registry save/load
#############################################################
def __init__(self, deck):
self.deck = deck
def load(self, groups, gconf):
self.groups = simplejson.loads(groups)
self.gconf = simplejson.loads(gconf)
self.changed = False
def save(self, g=None):
"Can be called with either a group or a group configuration."
if g:
g['mod'] = intTime()
self.changed = True
def flush(self):
if self.changed:
self.deck.db.execute("update deck set groups=?, gconf=?",
simplejson.dumps(self.groups),
simplejson.dumps(self.gconf))
# Group save/load
#############################################################
def id(self, name, create=True):
"Add a group with NAME. Reuse group if already exists. Return id as int."
for id, g in self.groups.items():
if g['name'].lower() == name.lower():
return int(id)
if not create:
return None
if "::" not in name:
# if it's a top level group, it gets the top level config
g = defaultTopConf.copy()
else:
# not top level; ensure all parents exist
g = {}
self._ensureParents(name)
g['name'] = name
g['conf'] = 1
while 1:
id = intTime(1000)
if str(id) in self.groups:
continue
g['id'] = id
self.groups[str(id)] = g
self.save(g)
return int(id)
def rem(self, gid):
self.deck.modSchema()
self.deck.db.execute("update cards set gid = 1 where gid = ?", gid)
self.deck.db.execute("update facts set gid = 1 where gid = ?", gid)
self.deck.db.execute("delete from groups where id = ?", gid)
print "fixme: loop through models and update stale gid references"
def allNames(self):
"An unsorted list of all group names."
return [x['name'] for x in self.groups.values()]
def all(self):
"A list of all groups."
return self.groups.values()
def allConf(self):
"A list of all group config."
return self.gconf.values()
def _ensureParents(self, name):
path = name.split("::")
s = ""
for p in path[:-1]:
if not s:
s += p
else:
s += "::" + p
self.id(s)
# Group utils
#############################################################
def name(self, gid):
return self.groups[str(gid)]['name']
def conf(self, gid):
return self.gconf[str(self.groups[str(gid)]['conf'])]
def get(self, gid):
id = str(gid)
if id in self.groups:
return self.groups[id]
def setGroup(self, cids, gid):
self.db.execute(
"update cards set gid = ? where id in "+ids2str(cids), gid)
def update(self, g):
"Add or update an existing group. Used for syncing and merging."
self.groups[str(g['id'])] = g
# mark registry changed, but don't bump mod time
self.save()
def updateConf(self, g):
self.gconf[str(g['id'])] = g
self.save()
# Group selection
#############################################################
def top(self):
"The current top level group as an object, and marks as modified."
g = self.get(self.deck.conf['topGroup'])
self.save(g)
return g
def active(self):
"The currrently active gids."
return self.deck.conf['activeGroups']
def selected(self):
"The currently selected gid, or None if whole collection."
return self.deck.conf['curGroup']
def select(self, gid):
"Select a new group. If gid is None, select whole collection."
if not gid:
self.deck.conf['topGroup'] = 1
self.deck.conf['curGroup'] = None
self.deck.conf['activeGroups'] = []
return
# save the top level group
name = self.groups[str(gid)]['name']
self.deck.conf['topGroup'] = self.topFor(name)
# current group
self.deck.conf['curGroup'] = gid
# and active groups (current + all children)
actv = [gid]
for g in self.all():
if g['name'].startswith(name + "::"):
actv.append(g['id'])
self.deck.conf['activeGroups'] = actv
def topFor(self, name):
"The top level gid for NAME."
path = name.split("::")
return self.id(path[0])