handle file name conflicts in media on .anki2 import

This commit is contained in:
Damien Elmes 2012-09-21 15:25:52 +09:00
parent fe6147e1dc
commit f13acf8c68
4 changed files with 116 additions and 8 deletions

View file

@ -630,7 +630,6 @@ where c.nid == f.id
ids = self.db.list(""" ids = self.db.list("""
select id from notes where mid not in """ + ids2str(self.models.ids())) select id from notes where mid not in """ + ids2str(self.models.ids()))
if ids: if ids:
print self.db.list("select distinct mid from notes where id in " + ids2str(ids))
problems.append( problems.append(
ngettext("Deleted %d note with missing note type.", ngettext("Deleted %d note with missing note type.",
"Deleted %d notes with missing note type.", len(ids)) "Deleted %d notes with missing note type.", len(ids))

View file

@ -2,8 +2,9 @@
# Copyright: Damien Elmes <anki@ichi2.net> # Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import os
from anki import Collection from anki import Collection
from anki.utils import intTime from anki.utils import intTime, splitFields, joinFields, checksum
from anki.importing.base import Importer from anki.importing.base import Importer
from anki.lang import _ from anki.lang import _
from anki.lang import ngettext from anki.lang import ngettext
@ -48,7 +49,7 @@ class Anki2Importer(Importer):
self._prepareModels() self._prepareModels()
self._importNotes() self._importNotes()
self._importCards() self._importCards()
self._importMedia() self._importStaticMedia()
self._postImport() self._postImport()
self.dst.db.execute("vacuum") self.dst.db.execute("vacuum")
self.dst.db.execute("analyze") self.dst.db.execute("analyze")
@ -86,6 +87,8 @@ class Anki2Importer(Importer):
# rewrite internal ids, models, etc # rewrite internal ids, models, etc
note[2] = lmid note[2] = lmid
note[4] = usn note[4] = usn
# update media references in case of dupes
note[6] = self._mungeMedia(mid, note[6])
add.append(note) add.append(note)
dirty.append(note[0]) dirty.append(note[0])
# note we have the added note # note we have the added note
@ -265,8 +268,60 @@ insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
# Media # Media
###################################################################### ######################################################################
def _importMedia(self): def _importStaticMedia(self):
self.src.media.copyTo(self.dst.media.dir()) # Import any '_foo' prefixed media files regardless of whether
# they're used on notes or not
for fname in os.listdir(self.src.media.dir()):
if fname.startswith("_") and not self.dst.media.have(fname):
self._writeDstMedia(fname, self._srcMediaData(fname))
def _mediaData(self, fname, dir=None):
if not dir:
dir = self.src.media.dir()
path = os.path.join(dir, fname)
try:
return open(path).read()
except IOError, OSError:
return
def _srcMediaData(self, fname):
"Data for FNAME in src collection."
return self._mediaData(fname, self.src.media.dir())
def _dstMediaData(self, fname):
"Data for FNAME in dst collection."
return self._mediaData(fname, self.dst.media.dir())
def _writeDstMedia(self, fname, data):
path = os.path.join(self.dst.media.dir(), fname)
open(path, "wb").write(data)
def _mungeMedia(self, mid, fields):
fields = splitFields(fields)
def repl(match):
fname = match.group(2)
srcData = self._srcMediaData(fname)
dstData = self._dstMediaData(fname)
if not srcData:
# file was not in source, ignore
return match.group(0)
# if model-local file exists from a previous import, use that
name, ext = os.path.splitext(fname)
lname = "%s_%s%s" % (name, mid, ext)
if self.dst.media.have(lname):
return match.group(0).replace(fname, lname)
# if missing or the same, pass unmodified
elif not dstData or srcData == dstData:
# need to copy?
if not dstData:
self._writeDstMedia(fname, srcData)
return match.group(0)
# exists but does not match, so we need to dedupe
self._writeDstMedia(lname, srcData)
return match.group(0).replace(fname, lname)
for i in range(len(fields)):
fields[i] = self.dst.media.transformNames(fields[i], repl)
return joinFields(fields)
# Post-import cleanup # Post-import cleanup
###################################################################### ######################################################################

View file

@ -120,6 +120,11 @@ If the same name exists, compare checksums."""
l.append(fname) l.append(fname)
return l return l
def transformNames(self, txt, func):
for reg in self.regexps:
txt = re.sub(reg, func, txt)
return txt
def strip(self, txt): def strip(self, txt):
for reg in self.regexps: for reg in self.regexps:
txt = re.sub(reg, "", txt) txt = re.sub(reg, "", txt)
@ -206,6 +211,9 @@ If the same name exists, compare checksums."""
cnt += 1 cnt += 1
return cnt return cnt
def have(self, fname):
return os.path.exists(os.path.join(self.dir(), fname))
# Media syncing - changes and removal # Media syncing - changes and removal
########################################################################## ##########################################################################

View file

@ -8,7 +8,6 @@ from anki.errors import *
from anki.importing import Anki1Importer, Anki2Importer, TextImporter, \ from anki.importing import Anki1Importer, Anki2Importer, TextImporter, \
SupermemoXmlImporter, MnemosyneImporter SupermemoXmlImporter, MnemosyneImporter
from anki.notes import Note from anki.notes import Note
from anki.db import * from anki.db import *
testDir = os.path.dirname(__file__) testDir = os.path.dirname(__file__)
@ -27,7 +26,7 @@ def test_anki2():
srcCards = src.cardCount() srcCards = src.cardCount()
srcRev = src.db.scalar("select count() from revlog") srcRev = src.db.scalar("select count() from revlog")
# add a media file for testing # add a media file for testing
open(os.path.join(src.media.dir(), "foo.jpg"), "w").write("foo") open(os.path.join(src.media.dir(), "_foo.jpg"), "w").write("foo")
src.close() src.close()
# create a new empty deck # create a new empty deck
dst = getEmptyDeck() dst = getEmptyDeck()
@ -53,6 +52,53 @@ def test_anki2():
assert len(os.listdir(dst.media.dir())) == 1 assert len(os.listdir(dst.media.dir())) == 1
#print dst.path #print dst.path
def test_anki2_mediadupes():
tmp = getEmptyDeck()
# add a note that references a sound
n = tmp.newNote()
n['Front'] = "[sound:foo.mp3]"
mid = n.model()['id']
tmp.addNote(n)
# add that sound to media folder
open(os.path.join(tmp.media.dir(), "foo.mp3"), "w").write("foo")
tmp.close()
# it should be imported correctly into an empty deck
empty = getEmptyDeck()
imp = Anki2Importer(empty, tmp.path)
imp.run()
assert os.listdir(empty.media.dir()) == ["foo.mp3"]
# and importing again will not duplicate, as the file content matches
empty.remCards(empty.db.list("select id from cards"))
imp = Anki2Importer(empty, tmp.path)
imp.run()
assert os.listdir(empty.media.dir()) == ["foo.mp3"]
n = empty.getNote(empty.db.scalar("select id from notes"))
assert "foo.mp3" in n.fields[0]
# if the local file content is different, and import should trigger a
# rename
empty.remCards(empty.db.list("select id from cards"))
open(os.path.join(empty.media.dir(), "foo.mp3"), "w").write("bar")
imp = Anki2Importer(empty, tmp.path)
imp.run()
assert sorted(os.listdir(empty.media.dir())) == [
"foo.mp3", "foo_%s.mp3" % mid]
n = empty.getNote(empty.db.scalar("select id from notes"))
assert "_" in n.fields[0]
# if the localized media file already exists, we rewrite the note and
# media
empty.remCards(empty.db.list("select id from cards"))
open(os.path.join(empty.media.dir(), "foo.mp3"), "w").write("bar")
imp = Anki2Importer(empty, tmp.path)
imp.run()
assert sorted(os.listdir(empty.media.dir())) == [
"foo.mp3", "foo_%s.mp3" % mid]
assert sorted(os.listdir(empty.media.dir())) == [
"foo.mp3", "foo_%s.mp3" % mid]
n = empty.getNote(empty.db.scalar("select id from notes"))
assert "_" in n.fields[0]
#print dst.path
def test_anki1(): def test_anki1():
# get the deck path to import # get the deck path to import
tmp = getUpgradeDeckPath() tmp = getUpgradeDeckPath()
@ -60,7 +106,7 @@ def test_anki1():
mdir = tmp.replace(".anki2", ".media") mdir = tmp.replace(".anki2", ".media")
if not os.path.exists(mdir): if not os.path.exists(mdir):
os.mkdir(mdir) os.mkdir(mdir)
open(os.path.join(mdir, "foo.jpg"), "w").write("foo") open(os.path.join(mdir, "_foo.jpg"), "w").write("foo")
# create a new empty deck # create a new empty deck
dst = getEmptyDeck() dst = getEmptyDeck()
# import src into dst # import src into dst