Anki/anki/importing/anki2.py
Damien Elmes 0cce540d83 enforce schema version; allow importing+exporting v2 scheduler
To avoid all sorts of problems, we need to ensure cards scheduled with
the V2 scheduler are not studied in older clients. Unfortunately we
can't just bump the file's schema version, as the existing clients will
freely import files created with newer versions. This patch changes
that, so things should be a bit easier in the future.

In the mean time, we need a way to prevent older clients from importing
files created with the V2 scheduler. To do this, we switch to using a
'collection.anki21' file in the archive, and include a dummy collection
.anki2 file.

The code has been tested with both deck and collection packages, but
exporting deck packages w/ scheduling info will remain disabled until
the V2 scheduler has had more testing.
2018-01-29 14:12:04 +10:00

405 lines
15 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import os
import unicodedata
from anki import Collection
from anki.utils import intTime, splitFields, joinFields, incGuid
from anki.importing.base import Importer
from anki.lang import _
from anki.lang import ngettext
GUID = 1
MID = 2
MOD = 3
class Anki2Importer(Importer):
needMapper = False
deckPrefix = None
allowUpdate = True
def run(self, media=None):
self._prepareFiles()
if media is not None:
# Anki1 importer has provided us with a custom media folder
self.src.media._dir = media
try:
self._import()
finally:
self.src.close(save=False)
def _prepareFiles(self):
if self.file.endswith(".anki21") and self.col.schedVer() == 1:
raise Exception("V2 scheduler must be enabled to import this file.")
self.dst = self.col
self.src = Collection(self.file)
def _import(self):
self._decks = {}
if self.deckPrefix:
id = self.dst.decks.id(self.deckPrefix)
self.dst.decks.select(id)
self._prepareTS()
self._prepareModels()
self._importNotes()
self._importCards()
self._importStaticMedia()
self._postImport()
self.dst.db.setAutocommit(True)
self.dst.db.execute("vacuum")
self.dst.db.execute("analyze")
self.dst.db.setAutocommit(False)
# Notes
######################################################################
def _importNotes(self):
# build guid -> (id,mod,mid) hash & map of existing note ids
self._notes = {}
existing = {}
for id, guid, mod, mid in self.dst.db.execute(
"select id, guid, mod, mid from notes"):
self._notes[guid] = (id, mod, mid)
existing[id] = True
# we may need to rewrite the guid if the model schemas don't match,
# so we need to keep track of the changes for the card import stage
self._changedGuids = {}
# we ignore updates to changed schemas. we need to note the ignored
# guids, so we avoid importing invalid cards
self._ignoredGuids = {}
# iterate over source collection
add = []
update = []
dirty = []
usn = self.dst.usn()
dupes = 0
dupesIgnored = []
for note in self.src.db.execute(
"select * from notes"):
# turn the db result into a mutable list
note = list(note)
shouldAdd = self._uniquifyNote(note)
if shouldAdd:
# ensure id is unique
while note[0] in existing:
note[0] += 999
existing[note[0]] = True
# bump usn
note[4] = usn
# update media references in case of dupes
note[6] = self._mungeMedia(note[MID], note[6])
add.append(note)
dirty.append(note[0])
# note we have the added the guid
self._notes[note[GUID]] = (note[0], note[3], note[MID])
else:
# a duplicate or changed schema - safe to update?
dupes += 1
if self.allowUpdate:
oldNid, oldMod, oldMid = self._notes[note[GUID]]
# will update if incoming note more recent
if oldMod < note[MOD]:
# safe if note types identical
if oldMid == note[MID]:
# incoming note should use existing id
note[0] = oldNid
note[4] = usn
note[6] = self._mungeMedia(note[MID], note[6])
update.append(note)
dirty.append(note[0])
else:
dupesIgnored.append("%s: %s" % (
self.col.models.get(oldMid)['name'],
note[6].replace("\x1f", ",")
))
self._ignoredGuids[note[GUID]] = True
if dupes:
up = len(update)
self.log.append(_("Updated %(a)d of %(b)d existing notes.") % dict(
a=len(update), b=dupes))
if dupesIgnored:
self.log.append(_("Some updates were ignored because note type has changed:"))
self.log.extend(dupesIgnored)
# export info for calling code
self.dupes = dupes
self.added = len(add)
self.updated = len(update)
# add to col
self.dst.db.executemany(
"insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)",
add)
self.dst.db.executemany(
"insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)",
update)
self.dst.updateFieldCache(dirty)
self.dst.tags.registerNotes(dirty)
# determine if note is a duplicate, and adjust mid and/or guid as required
# returns true if note should be added
def _uniquifyNote(self, note):
origGuid = note[GUID]
srcMid = note[MID]
dstMid = self._mid(srcMid)
# duplicate schemas?
if srcMid == dstMid:
return origGuid not in self._notes
# differing schemas and note doesn't exist?
note[MID] = dstMid
if origGuid not in self._notes:
return True
# schema changed; don't import
self._ignoredGuids[origGuid] = True
return False
# Models
######################################################################
# Models in the two decks may share an ID but not a schema, so we need to
# compare the field & template signature rather than just rely on ID. If
# the schemas don't match, we increment the mid and try again, creating a
# new model if necessary.
def _prepareModels(self):
"Prepare index of schema hashes."
self._modelMap = {}
def _mid(self, srcMid):
"Return local id for remote MID."
# already processed this mid?
if srcMid in self._modelMap:
return self._modelMap[srcMid]
mid = srcMid
srcModel = self.src.models.get(srcMid)
srcScm = self.src.models.scmhash(srcModel)
while True:
# missing from target col?
if not self.dst.models.have(mid):
# copy it over
model = srcModel.copy()
model['id'] = mid
model['usn'] = self.col.usn()
self.dst.models.update(model)
break
# there's an existing model; do the schemas match?
dstModel = self.dst.models.get(mid)
dstScm = self.dst.models.scmhash(dstModel)
if srcScm == dstScm:
# copy styling changes over if newer
if srcModel['mod'] > dstModel['mod']:
model = srcModel.copy()
model['id'] = mid
model['usn'] = self.col.usn()
self.dst.models.update(model)
break
# as they don't match, try next id
mid += 1
# save map and return new mid
self._modelMap[srcMid] = mid
return mid
# Decks
######################################################################
def _did(self, did):
"Given did in src col, return local id."
# already converted?
if did in self._decks:
return self._decks[did]
# get the name in src
g = self.src.decks.get(did)
name = g['name']
# if there's a prefix, replace the top level deck
if self.deckPrefix:
tmpname = "::".join(name.split("::")[1:])
name = self.deckPrefix
if tmpname:
name += "::" + tmpname
# manually create any parents so we can pull in descriptions
head = ""
for parent in name.split("::")[:-1]:
if head:
head += "::"
head += parent
idInSrc = self.src.decks.id(head)
self._did(idInSrc)
# create in local
newid = self.dst.decks.id(name)
# pull conf over
if 'conf' in g and g['conf'] != 1:
conf = self.src.decks.getConf(g['conf'])
self.dst.decks.save(conf)
self.dst.decks.updateConf(conf)
g2 = self.dst.decks.get(newid)
g2['conf'] = g['conf']
self.dst.decks.save(g2)
# save desc
deck = self.dst.decks.get(newid)
deck['desc'] = g['desc']
self.dst.decks.save(deck)
# add to deck map and return
self._decks[did] = newid
return newid
# Cards
######################################################################
def _importCards(self):
# build map of (guid, ord) -> cid and used id cache
self._cards = {}
existing = {}
for guid, ord, cid in self.dst.db.execute(
"select f.guid, c.ord, c.id from cards c, notes f "
"where c.nid = f.id"):
existing[cid] = True
self._cards[(guid, ord)] = cid
# loop through src
cards = []
revlog = []
cnt = 0
usn = self.dst.usn()
aheadBy = self.src.sched.today - self.dst.sched.today
for card in self.src.db.execute(
"select f.guid, f.mid, c.* from cards c, notes f "
"where c.nid = f.id"):
guid = card[0]
if guid in self._changedGuids:
guid = self._changedGuids[guid]
if guid in self._ignoredGuids:
continue
# does the card's note exist in dst col?
if guid not in self._notes:
continue
dnid = self._notes[guid]
# does the card already exist in the dst col?
ord = card[5]
if (guid, ord) in self._cards:
# fixme: in future, could update if newer mod time
continue
# doesn't exist. strip off note info, and save src id for later
card = list(card[2:])
scid = card[0]
# ensure the card id is unique
while card[0] in existing:
card[0] += 999
existing[card[0]] = True
# update cid, nid, etc
card[1] = self._notes[guid][0]
card[2] = self._did(card[2])
card[4] = intTime()
card[5] = usn
# review cards have a due date relative to collection
if card[7] in (2, 3) or card[6] == 2:
card[8] -= aheadBy
# if odid true, convert card from filtered to normal
if card[15]:
# odid
card[15] = 0
# odue
card[8] = card[14]
card[14] = 0
# queue
if card[6] == 1: # type
card[7] = 0
else:
card[7] = card[6]
# type
if card[6] == 1:
card[6] = 0
cards.append(card)
# we need to import revlog, rewriting card ids and bumping usn
for rev in self.src.db.execute(
"select * from revlog where cid = ?", scid):
rev = list(rev)
rev[1] = card[0]
rev[2] = self.dst.usn()
revlog.append(rev)
cnt += 1
# apply
self.dst.db.executemany("""
insert or ignore into cards values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", cards)
self.dst.db.executemany("""
insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
self.log.append(ngettext("%d card imported.", "%d cards imported.", cnt) % cnt)
# Media
######################################################################
# note: this func only applies to imports of .anki2. for .apkg files, the
# apkg importer does the copying
def _importStaticMedia(self):
# Import any '_foo' prefixed media files regardless of whether
# they're used on notes or not
dir = self.src.media.dir()
if not os.path.exists(dir):
return
for fname in os.listdir(dir):
if fname.startswith("_") and not self.dst.media.have(fname):
self._writeDstMedia(fname, self._srcMediaData(fname))
def _mediaData(self, fname, dir=None):
if not dir:
dir = self.src.media.dir()
path = os.path.join(dir, fname)
try:
with open(path, "rb") as f:
return f.read()
except (IOError, OSError):
return
def _srcMediaData(self, fname):
"Data for FNAME in src collection."
return self._mediaData(fname, self.src.media.dir())
def _dstMediaData(self, fname):
"Data for FNAME in dst collection."
return self._mediaData(fname, self.dst.media.dir())
def _writeDstMedia(self, fname, data):
path = os.path.join(self.dst.media.dir(),
unicodedata.normalize("NFC", fname))
try:
with open(path, "wb") as f:
f.write(data)
except (OSError, IOError):
# the user likely used subdirectories
pass
def _mungeMedia(self, mid, fields):
fields = splitFields(fields)
def repl(match):
fname = match.group("fname")
srcData = self._srcMediaData(fname)
dstData = self._dstMediaData(fname)
if not srcData:
# file was not in source, ignore
return match.group(0)
# if model-local file exists from a previous import, use that
name, ext = os.path.splitext(fname)
lname = "%s_%s%s" % (name, mid, ext)
if self.dst.media.have(lname):
return match.group(0).replace(fname, lname)
# if missing or the same, pass unmodified
elif not dstData or srcData == dstData:
# need to copy?
if not dstData:
self._writeDstMedia(fname, srcData)
return match.group(0)
# exists but does not match, so we need to dedupe
self._writeDstMedia(lname, srcData)
return match.group(0).replace(fname, lname)
for i in range(len(fields)):
fields[i] = self.dst.media.transformNames(fields[i], repl)
return joinFields(fields)
# Post-import cleanup
######################################################################
def _postImport(self):
for did in list(self._decks.values()):
self.col.sched.maybeRandomizeDeck(did)
# make sure new position is correct
self.dst.conf['nextPos'] = self.dst.db.scalar(
"select max(due)+1 from cards where type = 0") or 0
self.dst.save()