mirror of
https://github.com/ankitects/anki.git
synced 2025-11-14 08:37:11 -05:00
Ported the sync code to the latest libanki structure. Key points: No summary: The old style got each side to fetch ids+mod times and required the client to diff them and then request or bundle up the appropriate objects. Instead, we now get each side to send all changed objects, and it's the responsibility of the other side to decide what needs to be merged and what needs to be discarded. This allows us to skip a separate summary step, which saves scanning tables twice, and allows us to reduce server requests from 4 to 3. Schema changes: Certain operations that are difficult to merge (such as changing the number of fields in a model, or deleting models or groups) result in a full sync. The user is warned about it in the GUI before such schema-changing operations execute. Sync size: For now, we don't try to deal with large incremental syncs. Because the cards, facts and revlog can be large in memory (hundreds of megabytes in some cases), they would have to be chunked for the benefit of devices with a low amount of memory. Currently findChanges() uses the full fact/card objects which we're planning to send to the server. It could be rewritten to fetch a summary (just the id, mod & rep columns) which would save some memory, and then compare against blocks of a few hundred remote objects at a time. However, it's a bit more complicated than that: - If the local summary is huge it could exceed memory limits. Without a local summary we'd have to query the db for each record, which could be a lot slower. - We currently accumulate a list of remote records we need to add locally. This list also has the potential to get too big. We would need to periodically commit the changes as we accumulate them. - Merging a large amount of changes is also potentially slow on mobile devices. Given the fact that certain schema-changing operations require a full sync anyway, I think it's probably best to concentrate on a chunked full sync for now instead, as provided the user syncs periodically it should not be easy to hit the full sync limits except after bulk editing operations. Chunked partial syncing should be possible to add in the future without any changes to the deck format. Still to do: - deck conf merging - full syncing - new http proxy
355 lines
12 KiB
Python
355 lines
12 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright: Damien Elmes <anki@ichi2.net>
|
|
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
|
|
|
"""\
|
|
Importing support
|
|
==============================
|
|
|
|
To import, a mapping is created of the form: [FieldModel, ...]. The mapping
|
|
may be extended by calling code if a file has more fields. To ignore a
|
|
particular FieldModel, replace it with None. A special number 0 donates a tags
|
|
field. The same field model should not occur more than once."""
|
|
|
|
import time
|
|
#from anki.cards import cardsTable
|
|
#from anki.facts import factsTable, fieldsTable
|
|
from anki.lang import _
|
|
from anki.utils import fieldChecksum, ids2str
|
|
from anki.errors import *
|
|
#from anki.deck import NEW_CARDS_RANDOM
|
|
|
|
# FIXME: when importing an anki file, if any revlog entries are less than the
|
|
# last sync time, we need to bump the deck schema
|
|
|
|
# Base importer
|
|
##########################################################################
|
|
|
|
class ForeignCard(object):
|
|
"An temporary object storing fields and attributes."
|
|
def __init__(self):
|
|
self.fields = []
|
|
self.tags = u""
|
|
|
|
class Importer(object):
|
|
|
|
needMapper = True
|
|
tagDuplicates = False
|
|
# if set, update instead of regular importing
|
|
# (foreignCardFieldIndex, fieldModelId)
|
|
updateKey = None
|
|
multipleCardsAllowed = True
|
|
needDelimiter = False
|
|
|
|
def __init__(self, deck, file):
|
|
self.file = file
|
|
self._model = deck.currentModel
|
|
self._mapping = None
|
|
self.log = []
|
|
self.deck = deck
|
|
self.total = 0
|
|
self.tagsToAdd = u""
|
|
|
|
def doImport(self):
|
|
"Import."
|
|
if self.updateKey is not None:
|
|
return self.doUpdate()
|
|
random = self.deck.newCardOrder == NEW_CARDS_RANDOM
|
|
num = 6
|
|
if random:
|
|
num += 1
|
|
c = self.foreignCards()
|
|
if self.importCards(c):
|
|
self.deck.updateCardTags(self.cardIds)
|
|
if random:
|
|
self.deck.randomizeNewCards(self.cardIds)
|
|
if c:
|
|
self.deck.setModified()
|
|
|
|
def doUpdate(self):
|
|
# grab the data from the external file
|
|
cards = self.foreignCards()
|
|
# grab data from db
|
|
fields = self.deck.db.all("""
|
|
select factId, value from fields where fieldModelId = :id
|
|
and value != ''""",
|
|
id=self.updateKey[1])
|
|
# hash it
|
|
vhash = {}
|
|
fids = []
|
|
for (fid, val) in fields:
|
|
fids.append(fid)
|
|
vhash[val] = fid
|
|
# prepare tags
|
|
tagsIdx = None
|
|
try:
|
|
tagsIdx = self.mapping.index(0)
|
|
for c in cards:
|
|
c.tags = canonifyTags(self.tagsToAdd + " " + c.fields[tagsIdx])
|
|
except ValueError:
|
|
pass
|
|
# look for matches
|
|
upcards = []
|
|
newcards = []
|
|
for c in cards:
|
|
v = c.fields[self.updateKey[0]]
|
|
if v in vhash:
|
|
# ignore empty keys
|
|
if v:
|
|
# fid, card
|
|
upcards.append((vhash[v], c))
|
|
else:
|
|
newcards.append(c)
|
|
# update fields
|
|
for fm in self.model.fieldModels:
|
|
if fm.id == self.updateKey[1]:
|
|
# don't update key
|
|
continue
|
|
try:
|
|
index = self.mapping.index(fm)
|
|
except ValueError:
|
|
# not mapped
|
|
continue
|
|
data = [{'fid': fid,
|
|
'fmid': fm.id,
|
|
'v': c.fields[index],
|
|
'chk': self.maybeChecksum(c.fields[index], fm.unique)}
|
|
for (fid, c) in upcards]
|
|
self.deck.db.execute("""
|
|
update fields set value = :v, chksum = :chk where factId = :fid
|
|
and fieldModelId = :fmid""", data)
|
|
# update tags
|
|
if tagsIdx is not None:
|
|
data = [{'fid': fid,
|
|
't': c.fields[tagsIdx]}
|
|
for (fid, c) in upcards]
|
|
self.deck.db.execute(
|
|
"update facts set tags = :t where id = :fid",
|
|
data)
|
|
# rebuild caches
|
|
cids = self.deck.db.column0(
|
|
"select id from cards where factId in %s" %
|
|
ids2str(fids))
|
|
self.deck.updateCardTags(cids)
|
|
self.deck.updateCardsFromFactIds(fids)
|
|
self.total = len(cards)
|
|
self.deck.setModified()
|
|
|
|
def fields(self):
|
|
"The number of fields."
|
|
return 0
|
|
|
|
def maybeChecksum(self, data, unique):
|
|
if not unique:
|
|
return ""
|
|
return fieldChecksum(data)
|
|
|
|
def foreignCards(self):
|
|
"Return a list of foreign cards for importing."
|
|
assert 0
|
|
|
|
def resetMapping(self):
|
|
"Reset mapping to default."
|
|
numFields = self.fields()
|
|
m = [f for f in self.model.fieldModels]
|
|
m.append(0)
|
|
rem = max(0, self.fields() - len(m))
|
|
m += [None] * rem
|
|
del m[numFields:]
|
|
self._mapping = m
|
|
|
|
def getMapping(self):
|
|
if not self._mapping:
|
|
self.resetMapping()
|
|
return self._mapping
|
|
|
|
def setMapping(self, mapping):
|
|
self._mapping = mapping
|
|
|
|
mapping = property(getMapping, setMapping)
|
|
|
|
def getModel(self):
|
|
return self._model
|
|
|
|
def setModel(self, model):
|
|
self._model = model
|
|
# update the mapping for the new model
|
|
self._mapping = None
|
|
self.getMapping()
|
|
|
|
model = property(getModel, setModel)
|
|
|
|
def importCards(self, cards):
|
|
"Convert each card into a fact, apply attributes and add to deck."
|
|
# ensure all unique and required fields are mapped
|
|
for fm in self.model.fieldModels:
|
|
if fm.required or fm.unique:
|
|
if fm not in self.mapping:
|
|
raise ImportFormatError(
|
|
type="missingRequiredUnique",
|
|
info=_("Missing required/unique field '%(field)s'") %
|
|
{'field': fm.name})
|
|
active = 0
|
|
for cm in self.model.cardModels:
|
|
if cm.active: active += 1
|
|
if active > 1 and not self.multipleCardsAllowed:
|
|
raise ImportFormatError(type="tooManyCards",
|
|
info=_("""\
|
|
The current importer only supports a single active card template. Please disable\
|
|
all but one card template."""))
|
|
# strip invalid cards
|
|
cards = self.stripInvalid(cards)
|
|
cards = self.stripOrTagDupes(cards)
|
|
self.cardIds = []
|
|
if cards:
|
|
self.addCards(cards)
|
|
return cards
|
|
|
|
def addCards(self, cards):
|
|
"Add facts in bulk from foreign cards."
|
|
# map tags field to attr
|
|
try:
|
|
idx = self.mapping.index(0)
|
|
for c in cards:
|
|
c.tags += " " + c.fields[idx]
|
|
except ValueError:
|
|
pass
|
|
# add facts
|
|
factIds = [genID() for n in range(len(cards))]
|
|
factCreated = {}
|
|
def fudgeCreated(d, tmp=[]):
|
|
if not tmp:
|
|
tmp.append(time.time())
|
|
else:
|
|
tmp[0] += 0.0001
|
|
d['created'] = tmp[0]
|
|
factCreated[d['id']] = d['created']
|
|
return d
|
|
self.deck.db.execute(factsTable.insert(),
|
|
[fudgeCreated({'modelId': self.model.id,
|
|
'tags': canonifyTags(self.tagsToAdd + " " + cards[n].tags),
|
|
'id': factIds[n]}) for n in range(len(cards))])
|
|
self.deck.db.execute("""
|
|
delete from factsDeleted
|
|
where factId in (%s)""" % ",".join([str(s) for s in factIds]))
|
|
# add all the fields
|
|
for fm in self.model.fieldModels:
|
|
try:
|
|
index = self.mapping.index(fm)
|
|
except ValueError:
|
|
index = None
|
|
data = [{'factId': factIds[m],
|
|
'fieldModelId': fm.id,
|
|
'ordinal': fm.ordinal,
|
|
'id': genID(),
|
|
'value': (index is not None and
|
|
cards[m].fields[index] or u""),
|
|
'chksum': self.maybeChecksum(
|
|
index is not None and
|
|
cards[m].fields[index] or u"", fm.unique)
|
|
}
|
|
for m in range(len(cards))]
|
|
self.deck.db.execute(fieldsTable.insert(),
|
|
data)
|
|
# and cards
|
|
active = 0
|
|
for cm in self.model.cardModels:
|
|
if cm.active:
|
|
active += 1
|
|
data = [self.addMeta({
|
|
'id': genID(),
|
|
'factId': factIds[m],
|
|
'factCreated': factCreated[factIds[m]],
|
|
'cardModelId': cm.id,
|
|
'ordinal': cm.ordinal,
|
|
'question': u"",
|
|
'answer': u""
|
|
},cards[m]) for m in range(len(cards))]
|
|
self.deck.db.execute(cardsTable.insert(),
|
|
data)
|
|
self.deck.updateCardsFromFactIds(factIds)
|
|
self.total = len(factIds)
|
|
|
|
def addMeta(self, data, card):
|
|
"Add any scheduling metadata to cards"
|
|
if 'fields' in card.__dict__:
|
|
del card.fields
|
|
t = data['factCreated'] + data['ordinal'] * 0.00001
|
|
data['created'] = t
|
|
data['modified'] = t
|
|
data['due'] = t
|
|
data.update(card.__dict__)
|
|
data['tags'] = u""
|
|
self.cardIds.append(data['id'])
|
|
data['combinedDue'] = data['due']
|
|
if data.get('successive', 0):
|
|
t = 1
|
|
elif data.get('reps', 0):
|
|
t = 0
|
|
else:
|
|
t = 2
|
|
data['type'] = t
|
|
data['queue'] = t
|
|
return data
|
|
|
|
def stripInvalid(self, cards):
|
|
return [c for c in cards if self.cardIsValid(c)]
|
|
|
|
def cardIsValid(self, card):
|
|
fieldNum = len(card.fields)
|
|
for n in range(len(self.mapping)):
|
|
if self.mapping[n] and self.mapping[n].required:
|
|
if fieldNum <= n or not card.fields[n].strip():
|
|
self.log.append("Fact is missing field '%s': %s" %
|
|
(self.mapping[n].name,
|
|
", ".join(card.fields)))
|
|
return False
|
|
return True
|
|
|
|
def stripOrTagDupes(self, cards):
|
|
# build a cache of items
|
|
self.uniqueCache = {}
|
|
for field in self.mapping:
|
|
if field and field.unique:
|
|
self.uniqueCache[field.id] = self.getUniqueCache(field)
|
|
return [c for c in cards if self.cardIsUnique(c)]
|
|
|
|
def getUniqueCache(self, field):
|
|
"Return a dict with all fields, to test for uniqueness."
|
|
return dict(self.deck.db.all(
|
|
"select value, 1 from fields where fieldModelId = :fmid",
|
|
fmid=field.id))
|
|
|
|
def cardIsUnique(self, card):
|
|
fieldsAsTags = []
|
|
for n in range(len(self.mapping)):
|
|
if self.mapping[n] and self.mapping[n].unique:
|
|
if card.fields[n] in self.uniqueCache[self.mapping[n].id]:
|
|
if not self.tagDuplicates:
|
|
self.log.append("Fact has duplicate '%s': %s" %
|
|
(self.mapping[n].name,
|
|
", ".join(card.fields)))
|
|
return False
|
|
fieldsAsTags.append(self.mapping[n].name.replace(" ", "-"))
|
|
else:
|
|
self.uniqueCache[self.mapping[n].id][card.fields[n]] = 1
|
|
if fieldsAsTags:
|
|
card.tags += u" Duplicate:" + (
|
|
"+".join(fieldsAsTags))
|
|
card.tags = canonifyTags(card.tags)
|
|
return True
|
|
|
|
# Export modules
|
|
##########################################################################
|
|
|
|
from anki.importing.csvfile import TextImporter
|
|
from anki.importing.anki10 import Anki10Importer
|
|
from anki.importing.mnemosyne10 import Mnemosyne10Importer
|
|
from anki.importing.supermemo_xml import SupermemoXmlImporter
|
|
|
|
Importers = (
|
|
(_("Text separated by tabs or semicolons (*)"), TextImporter),
|
|
(_("Anki Deck (*.anki)"), Anki10Importer),
|
|
(_("Mnemosyne Deck (*.mem)"), Mnemosyne10Importer),
|
|
(_("Supermemo XML export (*.xml)"), SupermemoXmlImporter),
|
|
)
|