mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 22:12:21 -04:00
376 lines
14 KiB
Python
376 lines
14 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright: Damien Elmes <anki@ichi2.net>
|
|
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
|
|
|
import os
|
|
from anki import Collection
|
|
from anki.utils import intTime, splitFields, joinFields, checksum, guid64
|
|
from anki.importing.base import Importer
|
|
from anki.lang import _
|
|
from anki.lang import ngettext
|
|
|
|
#
|
|
# Import a .anki2 file into the current collection. Used for migration from
|
|
# 1.x, shared decks, and import from a packaged deck.
|
|
#
|
|
# We can't rely on internal ids, so we:
|
|
# - compare notes by guid
|
|
# - compare models by schema signature
|
|
# - compare cards by note guid + ordinal
|
|
# - compare decks by name
|
|
#
|
|
|
|
class Anki2Importer(Importer):
|
|
|
|
needMapper = False
|
|
deckPrefix = None
|
|
allowUpdate = True
|
|
|
|
def run(self, media=None):
|
|
self._prepareFiles()
|
|
if media is not None:
|
|
# Anki1 importer has provided us with a custom media folder
|
|
self.src.media._dir = media
|
|
try:
|
|
self._import()
|
|
finally:
|
|
self.src.close(save=False)
|
|
|
|
def _prepareFiles(self):
|
|
self.dst = self.col
|
|
self.src = Collection(self.file)
|
|
|
|
def _import(self):
|
|
self._decks = {}
|
|
if self.deckPrefix:
|
|
id = self.dst.decks.id(self.deckPrefix)
|
|
self.dst.decks.select(id)
|
|
self._prepareTS()
|
|
self._prepareModels()
|
|
self._importNotes()
|
|
self._importCards()
|
|
self._importStaticMedia()
|
|
self._postImport()
|
|
self.dst.db.execute("vacuum")
|
|
self.dst.db.execute("analyze")
|
|
|
|
# Notes
|
|
######################################################################
|
|
# - should note new for wizard
|
|
|
|
def _importNotes(self):
|
|
# build guid -> (id,mod,mid) hash
|
|
self._notes = {}
|
|
existing = {}
|
|
for id, guid, mod, mid in self.dst.db.execute(
|
|
"select id, guid, mod, mid from notes"):
|
|
self._notes[guid] = (id, mod, mid)
|
|
existing[id] = True
|
|
# we may need to rewrite the guid if the model schemas don't match,
|
|
# so we need to keep track of the changes for the card import stage
|
|
self._changedGuids = {}
|
|
# iterate over source collection
|
|
add = []
|
|
dirty = []
|
|
usn = self.dst.usn()
|
|
dupes = 0
|
|
for note in self.src.db.execute(
|
|
"select * from notes"):
|
|
# turn the db result into a mutable list
|
|
note = list(note)
|
|
guid, mid = note[1:3]
|
|
canUseExisting = False
|
|
alreadyHaveGuid = False
|
|
# do we have the same guid?
|
|
if guid in self._notes:
|
|
alreadyHaveGuid = True
|
|
# and do they share the same model id?
|
|
if self._notes[guid][2] == mid:
|
|
# and do they share the same schema?
|
|
srcM = self.src.models.get(mid)
|
|
dstM = self.dst.models.get(self._notes[guid][2])
|
|
if (self.src.models.scmhash(srcM) ==
|
|
self.src.models.scmhash(dstM)):
|
|
# then it's safe to treat as an exact duplicate
|
|
canUseExisting = True
|
|
# if we can't reuse an existing one, we'll need to add new
|
|
if not canUseExisting:
|
|
# get corresponding local model
|
|
lmid = self._mid(mid)
|
|
# ensure id is unique
|
|
while note[0] in existing:
|
|
note[0] += 999
|
|
existing[note[0]] = True
|
|
# rewrite internal ids, models, etc
|
|
note[2] = lmid
|
|
note[4] = usn
|
|
# update media references in case of dupes
|
|
note[6] = self._mungeMedia(mid, note[6])
|
|
add.append(note)
|
|
dirty.append(note[0])
|
|
# if it was originally the same as a note in this deck but the
|
|
# models have diverged, we need to change the guid
|
|
if alreadyHaveGuid:
|
|
guid = guid64()
|
|
self._changedGuids[note[1]] = guid
|
|
note[1] = guid
|
|
# note we have the added note
|
|
self._notes[guid] = (note[0], note[3], note[2])
|
|
else:
|
|
dupes += 1
|
|
## update existing note - not yet tested; for post 2.0
|
|
# newer = note[3] > mod
|
|
# if self.allowUpdate and self._mid(mid) == mid and newer:
|
|
# localNid = self._notes[guid][0]
|
|
# note[0] = localNid
|
|
# note[4] = usn
|
|
# add.append(note)
|
|
# dirty.append(note[0])
|
|
if dupes:
|
|
self.log.append(_("Already in collection: %s.") % (ngettext(
|
|
"%d note", "%d notes", dupes) % dupes))
|
|
# add to col
|
|
self.dst.db.executemany(
|
|
"insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)",
|
|
add)
|
|
self.dst.updateFieldCache(dirty)
|
|
self.dst.tags.registerNotes(dirty)
|
|
|
|
# Models
|
|
######################################################################
|
|
# Models in the two decks may share an ID but not a schema, so we need to
|
|
# compare the field & template signature rather than just rely on ID. If
|
|
# we created a new model on a conflict then multiple imports would end up
|
|
# with lots of models however, so we store a list of "alternate versions"
|
|
# of a model in the model, so that importing a model is idempotent.
|
|
|
|
def _prepareModels(self):
|
|
"Prepare index of schema hashes."
|
|
self._modelMap = {}
|
|
|
|
def _mid(self, mid):
|
|
"Return local id for remote MID."
|
|
# already processed this mid?
|
|
if mid in self._modelMap:
|
|
return self._modelMap[mid]
|
|
src = self.src.models.get(mid).copy()
|
|
# if it doesn't exist, we'll copy it over, preserving id
|
|
if not self.dst.models.have(mid):
|
|
self.dst.models.update(src)
|
|
# if we're importing with a prefix, make the model default to it
|
|
if self.deckPrefix:
|
|
src['did'] = self.dst.decks.current()['id']
|
|
# and give it a unique name if it's not a shared deck
|
|
if self.deckPrefix != "shared":
|
|
src['name'] += " (%s)" % self.deckPrefix
|
|
# make sure to bump usn
|
|
self.dst.models.save(src)
|
|
self._modelMap[mid] = mid
|
|
return mid
|
|
# if it does exist, do the schema match?
|
|
dst = self.dst.models.get(mid)
|
|
shash = self.src.models.scmhash(src)
|
|
dhash = self.src.models.scmhash(dst)
|
|
if shash == dhash:
|
|
# reuse without modification
|
|
self._modelMap[mid] = mid
|
|
return mid
|
|
# try any alternative versions
|
|
vers = dst.get("vers")
|
|
for v in vers:
|
|
m = self.dst.models.get(v)
|
|
if self.dst.models.scmhash(m) == shash:
|
|
# valid alternate found; use that
|
|
self._modelMap[mid] = m['id']
|
|
return m['id']
|
|
# need to add a new alternate version, with new id
|
|
self.dst.models.add(src)
|
|
if vers:
|
|
dst['vers'].append(src['id'])
|
|
else:
|
|
dst['vers'] = [src['id']]
|
|
self.dst.models.save(dst)
|
|
return src['id']
|
|
|
|
# Decks
|
|
######################################################################
|
|
|
|
def _did(self, did):
|
|
"Given did in src col, return local id."
|
|
# already converted?
|
|
if did in self._decks:
|
|
return self._decks[did]
|
|
# get the name in src
|
|
g = self.src.decks.get(did)
|
|
name = g['name']
|
|
# if there's a prefix, replace the top level deck
|
|
if self.deckPrefix:
|
|
tmpname = "::".join(name.split("::")[1:])
|
|
name = self.deckPrefix
|
|
if tmpname:
|
|
name += "::" + tmpname
|
|
# create in local
|
|
newid = self.dst.decks.id(name)
|
|
# pull conf over
|
|
if 'conf' in g and g['conf'] != 1:
|
|
self.dst.decks.updateConf(self.src.decks.getConf(g['conf']))
|
|
g2 = self.dst.decks.get(newid)
|
|
g2['conf'] = g['conf']
|
|
self.dst.decks.save(g2)
|
|
# save desc
|
|
deck = self.dst.decks.get(newid)
|
|
deck['desc'] = g['desc']
|
|
self.dst.decks.save(deck)
|
|
# add to deck map and return
|
|
self._decks[did] = newid
|
|
return newid
|
|
|
|
# Cards
|
|
######################################################################
|
|
|
|
def _importCards(self):
|
|
# build map of (guid, ord) -> cid and used id cache
|
|
self._cards = {}
|
|
existing = {}
|
|
for guid, ord, cid in self.dst.db.execute(
|
|
"select f.guid, c.ord, c.id from cards c, notes f "
|
|
"where c.nid = f.id"):
|
|
existing[cid] = True
|
|
self._cards[(guid, ord)] = cid
|
|
# loop through src
|
|
cards = []
|
|
revlog = []
|
|
cnt = 0
|
|
usn = self.dst.usn()
|
|
aheadBy = self.src.sched.today - self.dst.sched.today
|
|
for card in self.src.db.execute(
|
|
"select f.guid, f.mid, c.* from cards c, notes f "
|
|
"where c.nid = f.id"):
|
|
guid = card[0]
|
|
if guid in self._changedGuids:
|
|
guid = self._changedGuids[guid]
|
|
# does the card's note exist in dst col?
|
|
if guid not in self._notes:
|
|
continue
|
|
dnid = self._notes[guid]
|
|
# does the card already exist in the dst col?
|
|
ord = card[5]
|
|
if (guid, ord) in self._cards:
|
|
# fixme: in future, could update if newer mod time
|
|
continue
|
|
# doesn't exist. strip off note info, and save src id for later
|
|
card = list(card[2:])
|
|
scid = card[0]
|
|
# ensure the card id is unique
|
|
while card[0] in existing:
|
|
card[0] += 999
|
|
existing[card[0]] = True
|
|
# update cid, nid, etc
|
|
card[1] = self._notes[guid][0]
|
|
card[2] = self._did(card[2])
|
|
card[4] = intTime()
|
|
card[5] = usn
|
|
# review cards have a due date relative to collection
|
|
if card[7] in (2, 3):
|
|
card[8] -= aheadBy
|
|
# if odid true, convert card from filtered to normal
|
|
if card[15]:
|
|
# odid
|
|
card[15] = 0
|
|
# odue
|
|
card[8] = card[14]
|
|
card[14] = 0
|
|
# queue
|
|
if card[6] == 1: # type
|
|
card[7] = 0
|
|
else:
|
|
card[7] = card[6]
|
|
# type
|
|
if card[6] == 1:
|
|
card[6] = 0
|
|
cards.append(card)
|
|
# we need to import revlog, rewriting card ids and bumping usn
|
|
for rev in self.src.db.execute(
|
|
"select * from revlog where cid = ?", scid):
|
|
rev = list(rev)
|
|
rev[1] = card[0]
|
|
rev[2] = self.dst.usn()
|
|
revlog.append(rev)
|
|
cnt += 1
|
|
# apply
|
|
self.dst.db.executemany("""
|
|
insert or ignore into cards values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", cards)
|
|
self.dst.db.executemany("""
|
|
insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
|
|
self.log.append(ngettext("%d card imported.", "%d cards imported.", cnt) % cnt)
|
|
|
|
# Media
|
|
######################################################################
|
|
|
|
def _importStaticMedia(self):
|
|
# Import any '_foo' prefixed media files regardless of whether
|
|
# they're used on notes or not
|
|
dir = self.src.media.dir()
|
|
if not os.path.exists(dir):
|
|
return
|
|
for fname in os.listdir(dir):
|
|
if fname.startswith("_") and not self.dst.media.have(fname):
|
|
self._writeDstMedia(fname, self._srcMediaData(fname))
|
|
|
|
def _mediaData(self, fname, dir=None):
|
|
if not dir:
|
|
dir = self.src.media.dir()
|
|
path = os.path.join(dir, fname)
|
|
try:
|
|
return open(path, "rb").read()
|
|
except IOError, OSError:
|
|
return
|
|
|
|
def _srcMediaData(self, fname):
|
|
"Data for FNAME in src collection."
|
|
return self._mediaData(fname, self.src.media.dir())
|
|
|
|
def _dstMediaData(self, fname):
|
|
"Data for FNAME in dst collection."
|
|
return self._mediaData(fname, self.dst.media.dir())
|
|
|
|
def _writeDstMedia(self, fname, data):
|
|
path = os.path.join(self.dst.media.dir(), fname)
|
|
open(path, "wb").write(data)
|
|
|
|
def _mungeMedia(self, mid, fields):
|
|
fields = splitFields(fields)
|
|
def repl(match):
|
|
fname = match.group(2)
|
|
srcData = self._srcMediaData(fname)
|
|
dstData = self._dstMediaData(fname)
|
|
if not srcData:
|
|
# file was not in source, ignore
|
|
return match.group(0)
|
|
# if model-local file exists from a previous import, use that
|
|
name, ext = os.path.splitext(fname)
|
|
lname = "%s_%s%s" % (name, mid, ext)
|
|
if self.dst.media.have(lname):
|
|
return match.group(0).replace(fname, lname)
|
|
# if missing or the same, pass unmodified
|
|
elif not dstData or srcData == dstData:
|
|
# need to copy?
|
|
if not dstData:
|
|
self._writeDstMedia(fname, srcData)
|
|
return match.group(0)
|
|
# exists but does not match, so we need to dedupe
|
|
self._writeDstMedia(lname, srcData)
|
|
return match.group(0).replace(fname, lname)
|
|
for i in range(len(fields)):
|
|
fields[i] = self.dst.media.transformNames(fields[i], repl)
|
|
return joinFields(fields)
|
|
|
|
# Post-import cleanup
|
|
######################################################################
|
|
# fixme: we could be handling new card order more elegantly on import
|
|
|
|
def _postImport(self):
|
|
# make sure new position is correct
|
|
self.dst.conf['nextPos'] = self.dst.db.scalar(
|
|
"select max(due)+1 from cards where type = 0") or 0
|
|
self.dst.save()
|