mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 22:12:21 -04:00
define standard encoding for unicode (#893)
- always store media references in fields in NFC form - always encode filenames on disk in NFC form on machines other than macs - use relevant encoding when placing files in the media folder during syncs and apkg imports as well - rename 'unused media' back to 'check media' - check media can now automatically change media references and filename encodings to the correct format
This commit is contained in:
parent
4d42282b7b
commit
0d1d8c5bf9
6 changed files with 60 additions and 31 deletions
|
@ -3,6 +3,7 @@
|
||||||
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import unicodedata
|
||||||
from anki import Collection
|
from anki import Collection
|
||||||
from anki.utils import intTime, splitFields, joinFields, incGuid
|
from anki.utils import intTime, splitFields, joinFields, incGuid
|
||||||
from anki.importing.base import Importer
|
from anki.importing.base import Importer
|
||||||
|
@ -349,7 +350,8 @@ insert or ignore into revlog values (?,?,?,?,?,?,?,?,?)""", revlog)
|
||||||
return self._mediaData(fname, self.dst.media.dir())
|
return self._mediaData(fname, self.dst.media.dir())
|
||||||
|
|
||||||
def _writeDstMedia(self, fname, data):
|
def _writeDstMedia(self, fname, data):
|
||||||
path = os.path.join(self.dst.media.dir(), fname)
|
path = os.path.join(self.dst.media.dir(),
|
||||||
|
unicodedata.normalize("NFC", fname))
|
||||||
try:
|
try:
|
||||||
open(path, "wb").write(data)
|
open(path, "wb").write(data)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||||
|
|
||||||
import zipfile, os
|
import zipfile, os
|
||||||
|
import unicodedata
|
||||||
from anki.utils import tmpfile, json
|
from anki.utils import tmpfile, json
|
||||||
from anki.importing.anki2 import Anki2Importer
|
from anki.importing.anki2 import Anki2Importer
|
||||||
|
|
||||||
|
@ -26,7 +27,8 @@ class AnkiPackageImporter(Anki2Importer):
|
||||||
for file, c in self.nameToNum.items():
|
for file, c in self.nameToNum.items():
|
||||||
if not file.startswith("_") and not file.startswith("latex-"):
|
if not file.startswith("_") and not file.startswith("latex-"):
|
||||||
continue
|
continue
|
||||||
path = os.path.join(self.col.media.dir(), file)
|
path = os.path.join(self.col.media.dir(),
|
||||||
|
unicodedata.normalize("NFC", file))
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
open(path, "wb").write(z.read(c))
|
open(path, "wb").write(z.read(c))
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,7 @@ class MediaManager(object):
|
||||||
|
|
||||||
# Adding media
|
# Adding media
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
# opath must be in unicode
|
||||||
|
|
||||||
def addFile(self, opath):
|
def addFile(self, opath):
|
||||||
return self.writeData(opath, open(opath, "rb").read())
|
return self.writeData(opath, open(opath, "rb").read())
|
||||||
|
@ -90,6 +91,9 @@ class MediaManager(object):
|
||||||
def writeData(self, opath, data):
|
def writeData(self, opath, data):
|
||||||
# if fname is a full path, use only the basename
|
# if fname is a full path, use only the basename
|
||||||
fname = os.path.basename(opath)
|
fname = os.path.basename(opath)
|
||||||
|
# make sure we write it in NFC form (on mac will autoconvert to NFD),
|
||||||
|
# and return an NFC-encoded reference
|
||||||
|
fname = unicodedata.normalize("NFC", fname)
|
||||||
# remove any dangerous characters
|
# remove any dangerous characters
|
||||||
base = self.stripIllegal(fname)
|
base = self.stripIllegal(fname)
|
||||||
(root, ext) = os.path.splitext(base)
|
(root, ext) = os.path.splitext(base)
|
||||||
|
@ -186,15 +190,19 @@ class MediaManager(object):
|
||||||
def check(self, local=None):
|
def check(self, local=None):
|
||||||
"Return (missingFiles, unusedFiles)."
|
"Return (missingFiles, unusedFiles)."
|
||||||
mdir = self.dir()
|
mdir = self.dir()
|
||||||
# generate card q/a and look through all references
|
# gather all media references in NFC form
|
||||||
normrefs = {}
|
allRefs = set()
|
||||||
def norm(s):
|
for nid, mid, flds in self.col.db.execute("select id, mid, flds from notes"):
|
||||||
if isinstance(s, unicode) and isMac:
|
noteRefs = self.filesInStr(mid, flds)
|
||||||
return unicodedata.normalize('NFD', s)
|
# check the refs are in NFC
|
||||||
return s
|
for f in noteRefs:
|
||||||
for f in self.allMedia():
|
# if they're not, we'll need to fix them first
|
||||||
normrefs[norm(f)] = True
|
if f != unicodedata.normalize("NFC", f):
|
||||||
# loop through directory and find unused & missing media
|
self._normalizeNoteRefs(nid)
|
||||||
|
noteRefs = self.filesInStr(mid, flds)
|
||||||
|
break
|
||||||
|
allRefs.update(noteRefs)
|
||||||
|
# loop through media folder
|
||||||
unused = []
|
unused = []
|
||||||
if local is None:
|
if local is None:
|
||||||
files = os.listdir(mdir)
|
files = os.listdir(mdir)
|
||||||
|
@ -202,28 +210,38 @@ class MediaManager(object):
|
||||||
files = local
|
files = local
|
||||||
for file in files:
|
for file in files:
|
||||||
if not local:
|
if not local:
|
||||||
path = os.path.join(mdir, file)
|
if not os.path.isfile(file):
|
||||||
if not os.path.isfile(path):
|
|
||||||
# ignore directories
|
# ignore directories
|
||||||
continue
|
continue
|
||||||
if file.startswith("_"):
|
if file.startswith("_"):
|
||||||
# leading _ says to ignore file
|
# leading _ says to ignore file
|
||||||
continue
|
continue
|
||||||
nfile = norm(file)
|
nfcFile = unicodedata.normalize("NFC", file)
|
||||||
if nfile not in normrefs:
|
# we enforce NFC fs encoding on non-macs; on macs we'll have gotten
|
||||||
|
# NFD so we use the above variable for comparing references
|
||||||
|
if not isMac:
|
||||||
|
if file != nfcFile:
|
||||||
|
# delete if we already have the NFC form, otherwise rename
|
||||||
|
if os.path.exists(nfcFile):
|
||||||
|
os.unlink(file)
|
||||||
|
else:
|
||||||
|
os.rename(file, nfcFile)
|
||||||
|
file = nfcFile
|
||||||
|
# compare
|
||||||
|
if nfcFile not in allRefs:
|
||||||
unused.append(file)
|
unused.append(file)
|
||||||
else:
|
else:
|
||||||
del normrefs[nfile]
|
allRefs.discard(nfcFile)
|
||||||
nohave = [x for x in normrefs.keys() if not x.startswith("_")]
|
nohave = [x for x in allRefs if not x.startswith("_")]
|
||||||
return (nohave, unused)
|
return (nohave, unused)
|
||||||
|
|
||||||
def allMedia(self):
|
def _normalizeNoteRefs(self, nid):
|
||||||
"Return a set of all referenced filenames."
|
note = self.col.getNote(nid)
|
||||||
files = set()
|
for c, fld in enumerate(note.fields):
|
||||||
for mid, flds in self.col.db.execute("select mid, flds from notes"):
|
nfc = unicodedata.normalize("NFC", fld)
|
||||||
for f in self.filesInStr(mid, flds):
|
if nfc != fld:
|
||||||
files.add(f)
|
note.fields[c] = nfc
|
||||||
return files
|
note.flush()
|
||||||
|
|
||||||
# Copying on import
|
# Copying on import
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
@ -276,6 +294,11 @@ class MediaManager(object):
|
||||||
data = z.read(i)
|
data = z.read(i)
|
||||||
csum = checksum(data)
|
csum = checksum(data)
|
||||||
name = meta[i.filename]
|
name = meta[i.filename]
|
||||||
|
# normalize name for platform
|
||||||
|
if isMac:
|
||||||
|
name = unicodedata.normalize("NFD", name)
|
||||||
|
else:
|
||||||
|
name = unicodedata.normalize("NFC", name)
|
||||||
# save file
|
# save file
|
||||||
open(name, "wb").write(data)
|
open(name, "wb").write(data)
|
||||||
# update db
|
# update db
|
||||||
|
@ -327,6 +350,8 @@ class MediaManager(object):
|
||||||
z.writestr("_finished", "")
|
z.writestr("_finished", "")
|
||||||
break
|
break
|
||||||
fname = fname[0]
|
fname = fname[0]
|
||||||
|
# we add it as a one-element array simply to make
|
||||||
|
# the later forgetAdded() call easier
|
||||||
fnames.append([fname])
|
fnames.append([fname])
|
||||||
z.write(fname, str(cnt))
|
z.write(fname, str(cnt))
|
||||||
files[str(cnt)] = fname
|
files[str(cnt)] = fname
|
||||||
|
|
|
@ -151,7 +151,7 @@
|
||||||
</action>
|
</action>
|
||||||
<action name="actionCheckMediaDatabase">
|
<action name="actionCheckMediaDatabase">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>&Unused Media...</string>
|
<string>Check &Media...</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="statusTip">
|
<property name="statusTip">
|
||||||
<string>Check the files in the media directory</string>
|
<string>Check the files in the media directory</string>
|
||||||
|
|
|
@ -18,6 +18,6 @@ def getUpgradeDeckPath(name="anki12.anki"):
|
||||||
src = os.path.join(testDir, "support", name)
|
src = os.path.join(testDir, "support", name)
|
||||||
(fd, dst) = tempfile.mkstemp(suffix=".anki2")
|
(fd, dst) = tempfile.mkstemp(suffix=".anki2")
|
||||||
shutil.copy(src, dst)
|
shutil.copy(src, dst)
|
||||||
return dst
|
return unicode(dst, "utf8")
|
||||||
|
|
||||||
testDir = os.path.dirname(__file__)
|
testDir = os.path.dirname(__file__)
|
||||||
|
|
|
@ -7,7 +7,7 @@ from shared import getEmptyDeck, testDir
|
||||||
def test_add():
|
def test_add():
|
||||||
d = getEmptyDeck()
|
d = getEmptyDeck()
|
||||||
dir = tempfile.mkdtemp(prefix="anki")
|
dir = tempfile.mkdtemp(prefix="anki")
|
||||||
path = os.path.join(dir, "foo.jpg")
|
path = os.path.join(dir, u"foo.jpg")
|
||||||
open(path, "w").write("hello")
|
open(path, "w").write("hello")
|
||||||
# new file, should preserve name
|
# new file, should preserve name
|
||||||
assert d.media.addFile(path) == "foo.jpg"
|
assert d.media.addFile(path) == "foo.jpg"
|
||||||
|
@ -72,7 +72,7 @@ def test_changes():
|
||||||
assert not list(d.media.removed())
|
assert not list(d.media.removed())
|
||||||
# add a file
|
# add a file
|
||||||
dir = tempfile.mkdtemp(prefix="anki")
|
dir = tempfile.mkdtemp(prefix="anki")
|
||||||
path = os.path.join(dir, "foo.jpg")
|
path = os.path.join(dir, u"foo.jpg")
|
||||||
open(path, "w").write("hello")
|
open(path, "w").write("hello")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
path = d.media.addFile(path)
|
path = d.media.addFile(path)
|
||||||
|
|
Loading…
Reference in a new issue