handle file name conflicts in media on .anki2 import

This commit is contained in:
Damien Elmes 2012-09-21 15:25:52 +09:00
parent fe6147e1dc
commit f13acf8c68
4 changed files with 116 additions and 8 deletions

View file

@ -630,7 +630,6 @@ where c.nid == f.id
ids = self.db.list("""
select id from notes where mid not in """ + ids2str(self.models.ids()))
if ids:
print self.db.list("select distinct mid from notes where id in " + ids2str(ids))
problems.append(
ngettext("Deleted %d note with missing note type.",
"Deleted %d notes with missing note type.", len(ids))

View file

@ -2,8 +2,9 @@
# Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import os
from anki import Collection
from anki.utils import intTime
from anki.utils import intTime, splitFields, joinFields, checksum
from anki.importing.base import Importer
from anki.lang import _
from anki.lang import ngettext
@ -48,7 +49,7 @@ class Anki2Importer(Importer):
self._prepareModels()
self._importNotes()
self._importCards()
self._importMedia()
self._importStaticMedia()
self._postImport()
self.dst.db.execute("vacuum")
self.dst.db.execute("analyze")
@ -86,6 +87,8 @@ class Anki2Importer(Importer):
# rewrite internal ids, models, etc
note[2] = lmid
note[4] = usn
# update media references in case of dupes
note[6] = self._mungeMedia(mid, note[6])
add.append(note)
dirty.append(note[0])
# note we have the added note
@ -265,8 +268,60 @@ insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
# Media
######################################################################
def _importMedia(self):
self.src.media.copyTo(self.dst.media.dir())
def _importStaticMedia(self):
# Import any '_foo' prefixed media files regardless of whether
# they're used on notes or not
for fname in os.listdir(self.src.media.dir()):
if fname.startswith("_") and not self.dst.media.have(fname):
self._writeDstMedia(fname, self._srcMediaData(fname))
def _mediaData(self, fname, dir=None):
if not dir:
dir = self.src.media.dir()
path = os.path.join(dir, fname)
try:
return open(path).read()
except IOError, OSError:
return
def _srcMediaData(self, fname):
"Data for FNAME in src collection."
return self._mediaData(fname, self.src.media.dir())
def _dstMediaData(self, fname):
"Data for FNAME in dst collection."
return self._mediaData(fname, self.dst.media.dir())
def _writeDstMedia(self, fname, data):
path = os.path.join(self.dst.media.dir(), fname)
open(path, "wb").write(data)
def _mungeMedia(self, mid, fields):
fields = splitFields(fields)
def repl(match):
fname = match.group(2)
srcData = self._srcMediaData(fname)
dstData = self._dstMediaData(fname)
if not srcData:
# file was not in source, ignore
return match.group(0)
# if model-local file exists from a previous import, use that
name, ext = os.path.splitext(fname)
lname = "%s_%s%s" % (name, mid, ext)
if self.dst.media.have(lname):
return match.group(0).replace(fname, lname)
# if missing or the same, pass unmodified
elif not dstData or srcData == dstData:
# need to copy?
if not dstData:
self._writeDstMedia(fname, srcData)
return match.group(0)
# exists but does not match, so we need to dedupe
self._writeDstMedia(lname, srcData)
return match.group(0).replace(fname, lname)
for i in range(len(fields)):
fields[i] = self.dst.media.transformNames(fields[i], repl)
return joinFields(fields)
# Post-import cleanup
######################################################################

View file

@ -120,6 +120,11 @@ If the same name exists, compare checksums."""
l.append(fname)
return l
def transformNames(self, txt, func):
for reg in self.regexps:
txt = re.sub(reg, func, txt)
return txt
def strip(self, txt):
for reg in self.regexps:
txt = re.sub(reg, "", txt)
@ -206,6 +211,9 @@ If the same name exists, compare checksums."""
cnt += 1
return cnt
def have(self, fname):
return os.path.exists(os.path.join(self.dir(), fname))
# Media syncing - changes and removal
##########################################################################

View file

@ -8,7 +8,6 @@ from anki.errors import *
from anki.importing import Anki1Importer, Anki2Importer, TextImporter, \
SupermemoXmlImporter, MnemosyneImporter
from anki.notes import Note
from anki.db import *
testDir = os.path.dirname(__file__)
@ -27,7 +26,7 @@ def test_anki2():
srcCards = src.cardCount()
srcRev = src.db.scalar("select count() from revlog")
# add a media file for testing
open(os.path.join(src.media.dir(), "foo.jpg"), "w").write("foo")
open(os.path.join(src.media.dir(), "_foo.jpg"), "w").write("foo")
src.close()
# create a new empty deck
dst = getEmptyDeck()
@ -53,6 +52,53 @@ def test_anki2():
assert len(os.listdir(dst.media.dir())) == 1
#print dst.path
def test_anki2_mediadupes():
tmp = getEmptyDeck()
# add a note that references a sound
n = tmp.newNote()
n['Front'] = "[sound:foo.mp3]"
mid = n.model()['id']
tmp.addNote(n)
# add that sound to media folder
open(os.path.join(tmp.media.dir(), "foo.mp3"), "w").write("foo")
tmp.close()
# it should be imported correctly into an empty deck
empty = getEmptyDeck()
imp = Anki2Importer(empty, tmp.path)
imp.run()
assert os.listdir(empty.media.dir()) == ["foo.mp3"]
# and importing again will not duplicate, as the file content matches
empty.remCards(empty.db.list("select id from cards"))
imp = Anki2Importer(empty, tmp.path)
imp.run()
assert os.listdir(empty.media.dir()) == ["foo.mp3"]
n = empty.getNote(empty.db.scalar("select id from notes"))
assert "foo.mp3" in n.fields[0]
# if the local file content is different, and import should trigger a
# rename
empty.remCards(empty.db.list("select id from cards"))
open(os.path.join(empty.media.dir(), "foo.mp3"), "w").write("bar")
imp = Anki2Importer(empty, tmp.path)
imp.run()
assert sorted(os.listdir(empty.media.dir())) == [
"foo.mp3", "foo_%s.mp3" % mid]
n = empty.getNote(empty.db.scalar("select id from notes"))
assert "_" in n.fields[0]
# if the localized media file already exists, we rewrite the note and
# media
empty.remCards(empty.db.list("select id from cards"))
open(os.path.join(empty.media.dir(), "foo.mp3"), "w").write("bar")
imp = Anki2Importer(empty, tmp.path)
imp.run()
assert sorted(os.listdir(empty.media.dir())) == [
"foo.mp3", "foo_%s.mp3" % mid]
assert sorted(os.listdir(empty.media.dir())) == [
"foo.mp3", "foo_%s.mp3" % mid]
n = empty.getNote(empty.db.scalar("select id from notes"))
assert "_" in n.fields[0]
#print dst.path
def test_anki1():
# get the deck path to import
tmp = getUpgradeDeckPath()
@ -60,7 +106,7 @@ def test_anki1():
mdir = tmp.replace(".anki2", ".media")
if not os.path.exists(mdir):
os.mkdir(mdir)
open(os.path.join(mdir, "foo.jpg"), "w").write("foo")
open(os.path.join(mdir, "_foo.jpg"), "w").write("foo")
# create a new empty deck
dst = getEmptyDeck()
# import src into dst