diff --git a/anki/importing/anki2.py b/anki/importing/anki2.py index 4a95440c0..9118238db 100644 --- a/anki/importing/anki2.py +++ b/anki/importing/anki2.py @@ -4,21 +4,14 @@ import os from anki import Collection -from anki.utils import intTime, splitFields, joinFields, checksum, guid64 +from anki.utils import intTime, splitFields, joinFields, checksum, guid64, \ + incGuid from anki.importing.base import Importer from anki.lang import _ from anki.lang import ngettext -# -# Import a .anki2 file into the current collection. Used for migration from -# 1.x, shared decks, and import from a packaged deck. -# -# We can't rely on internal ids, so we: -# - compare notes by guid -# - compare models by schema signature -# - compare cards by note guid + ordinal -# - compare decks by name -# +MID = 2 +GUID = 1 class Anki2Importer(Importer): @@ -56,10 +49,9 @@ class Anki2Importer(Importer): # Notes ###################################################################### - # - should note new for wizard def _importNotes(self): - # build guid -> (id,mod,mid) hash + # build guid -> (id,mod,mid) hash & map of existing note ids self._notes = {} existing = {} for id, guid, mod, mid in self.dst.db.execute( @@ -78,44 +70,20 @@ class Anki2Importer(Importer): "select * from notes"): # turn the db result into a mutable list note = list(note) - guid, mid = note[1:3] - canUseExisting = False - alreadyHaveGuid = False - # do we have the same guid? - if guid in self._notes: - alreadyHaveGuid = True - # and do they share the same model id? - if self._notes[guid][2] == mid: - # and do they share the same schema? - srcM = self.src.models.get(mid) - dstM = self.dst.models.get(self._notes[guid][2]) - if (self.src.models.scmhash(srcM) == - self.src.models.scmhash(dstM)): - # then it's safe to treat as an exact duplicate - canUseExisting = True - # if we can't reuse an existing one, we'll need to add new - if not canUseExisting: - # get corresponding local model - lmid = self._mid(mid) + shouldAdd = self._uniquifyNote(note) + if shouldAdd: # ensure id is unique while note[0] in existing: note[0] += 999 existing[note[0]] = True - # rewrite internal ids, models, etc - note[2] = lmid + # bump usn note[4] = usn # update media references in case of dupes - note[6] = self._mungeMedia(mid, note[6]) + note[6] = self._mungeMedia(note[MID], note[6]) add.append(note) dirty.append(note[0]) - # if it was originally the same as a note in this deck but the - # models have diverged, we need to change the guid - if alreadyHaveGuid: - guid = guid64() - self._changedGuids[note[1]] = guid - note[1] = guid - # note we have the added note - self._notes[guid] = (note[0], note[3], note[2]) + # note we have the added the guid + self._notes[note[GUID]] = (note[0], note[3], note[MID]) else: dupes += 1 ## update existing note - not yet tested; for post 2.0 @@ -136,61 +104,69 @@ class Anki2Importer(Importer): self.dst.updateFieldCache(dirty) self.dst.tags.registerNotes(dirty) + # determine if note is a duplicate, and adjust mid and/or guid as required + # returns true if note should be added + def _uniquifyNote(self, note): + origGuid = note[GUID] + srcMid = note[MID] + dstMid = self._mid(srcMid) + # duplicate schemas? + if srcMid == dstMid: + return origGuid not in self._notes + # differing schemas + note[MID] = dstMid + if origGuid not in self._notes: + return True + # as the schemas differ and we already have a note with a different + # note type, this note needs a new guid + while True: + note[GUID] = incGuid(note[GUID]) + self._changedGuids[origGuid] = note[GUID] + # if we don't have an existing guid, we can add + if note[GUID] not in self._notes: + return True + # if the existing guid shares the same mid, we can reuse + if dstMid == self._notes[note[GUID]][MID]: + return False + # Models ###################################################################### # Models in the two decks may share an ID but not a schema, so we need to # compare the field & template signature rather than just rely on ID. If - # we created a new model on a conflict then multiple imports would end up - # with lots of models however, so we store a list of "alternate versions" - # of a model in the model, so that importing a model is idempotent. + # the schemas don't match, we increment the mid and try again, creating a + # new model if necessary. def _prepareModels(self): "Prepare index of schema hashes." self._modelMap = {} - def _mid(self, mid): + def _mid(self, srcMid): "Return local id for remote MID." # already processed this mid? - if mid in self._modelMap: - return self._modelMap[mid] - src = self.src.models.get(mid).copy() - # if it doesn't exist, we'll copy it over, preserving id - if not self.dst.models.have(mid): - self.dst.models.update(src) - # if we're importing with a prefix, make the model default to it - if self.deckPrefix: - src['did'] = self.dst.decks.current()['id'] - # and give it a unique name if it's not a shared deck - if self.deckPrefix != "shared": - src['name'] += " (%s)" % self.deckPrefix - # make sure to bump usn - self.dst.models.save(src) - self._modelMap[mid] = mid - return mid - # if it does exist, do the schema match? - dst = self.dst.models.get(mid) - shash = self.src.models.scmhash(src) - dhash = self.src.models.scmhash(dst) - if shash == dhash: - # reuse without modification - self._modelMap[mid] = mid - return mid - # try any alternative versions - vers = dst.get("vers") - for v in vers: - m = self.dst.models.get(v) - if self.dst.models.scmhash(m) == shash: - # valid alternate found; use that - self._modelMap[mid] = m['id'] - return m['id'] - # need to add a new alternate version, with new id - self.dst.models.add(src) - if vers: - dst['vers'].append(src['id']) - else: - dst['vers'] = [src['id']] - self.dst.models.save(dst) - return src['id'] + if srcMid in self._modelMap: + return self._modelMap[srcMid] + mid = srcMid + srcModel = self.src.models.get(srcMid) + srcScm = self.src.models.scmhash(srcModel) + while True: + # missing from target col? + if not self.dst.models.have(mid): + # copy it over + model = srcModel.copy() + model['id'] = mid + self.dst.models.update(model) + break + # there's an existing model; do the schemas match? + dstModel = self.dst.models.get(mid) + dstScm = self.dst.models.scmhash(dstModel) + if srcScm == dstScm: + # they do; we can reuse this mid + break + # as they don't match, try next id + mid += 1 + # save map and return new mid + self._modelMap[srcMid] = mid + return mid # Decks ###################################################################### diff --git a/anki/models.py b/anki/models.py index e4044875d..8c129aa4a 100644 --- a/anki/models.py +++ b/anki/models.py @@ -28,7 +28,7 @@ defaultModel = { 'latexPost': "\\end{document}", 'mod': 0, 'usn': 0, - 'vers': [], + 'vers': [], # FIXME: remove when other clients have caught up 'type': MODEL_STD, 'css': """\ .card { diff --git a/anki/upgrade.py b/anki/upgrade.py index 50b5bebc4..7bb1209cf 100644 --- a/anki/upgrade.py +++ b/anki/upgrade.py @@ -345,6 +345,7 @@ insert or replace into col select id, cast(created as int), :t, t = abs(row[0]) if t > 4294967296: t >>= 32 + assert t > 0 m = anki.models.defaultModel.copy() m['id'] = t m['name'] = row[1] diff --git a/anki/utils.py b/anki/utils.py index 524b7515e..d2d32ec07 100644 --- a/anki/utils.py +++ b/anki/utils.py @@ -198,22 +198,36 @@ def maxID(db): # used in ankiweb def base62(num, extra=""): - s = string - table = s.ascii_letters + s.digits + extra + s = string; table = s.ascii_letters + s.digits + extra buf = "" while num: num, i = divmod(num, len(table)) buf = table[i] + buf return buf +_base91_extra_chars = "!#$%&()*+,-./:;<=>?@[]^_`{|}~" def base91(num): # all printable characters minus quotes, backslash and separators - return base62(num, "!#$%&()*+,-./:;<=>?@[]^_`{|}~") + return base62(num, _base91_extra_chars) def guid64(): "Return a base91-encoded 64bit random number." return base91(random.randint(0, 2**64-1)) +# increment a guid by one, for note type conflicts +def incGuid(guid): + return _incGuid(guid[::-1])[::-1] + +def _incGuid(guid): + s = string; table = s.ascii_letters + s.digits + _base91_extra_chars + idx = table.index(guid[0]) + if idx + 1 == len(table): + # overflow + guid = table[0] + _incGuid(guid[1:]) + else: + guid = table[idx+1] + guid[1:] + return guid + # Fields ############################################################################## diff --git a/tests/test_importing.py b/tests/test_importing.py index 8f5134b13..cae350ce4 100644 --- a/tests/test_importing.py +++ b/tests/test_importing.py @@ -157,6 +157,13 @@ def test_anki1_diffmodels(): after = dst.noteCount() # as the model schemas differ, should have been imported as new model assert after == before + 1 + # repeating the process should do nothing + beforeModels = len(dst.models.all()) + imp = Anki1Importer(dst, tmp) + imp.run() + after = dst.noteCount() + assert after == before + 1 + assert beforeModels == len(dst.models.all()) def test_anki2_diffmodels(): # create a new empty deck @@ -179,6 +186,12 @@ def test_anki2_diffmodels(): assert after == before + 1 # and the new model should have both cards assert dst.cardCount() == 3 + # repeating the process should do nothing + imp = AnkiPackageImporter(dst, tmp) + imp.run() + after = dst.noteCount() + assert after == before + 1 + assert dst.cardCount() == 3 def test_csv(): deck = getEmptyDeck()