normalize to NFC on all platforms

APFS is normalization-preserving, which results in spurious changes
being detected if we assume we'll receive NFD from file listings

this needs to be ported to 2.0.x as well
This commit is contained in:
Damien Elmes 2018-02-28 16:22:06 +10:00
parent 3cccae7a1f
commit 88fe45b1bb
2 changed files with 24 additions and 21 deletions

View file

@ -2,7 +2,8 @@
# Copyright: Damien Elmes <anki@ichi2.net> # Copyright: Damien Elmes <anki@ichi2.net>
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import re, os, zipfile, shutil import re, os, zipfile, shutil, unicodedata
from anki.lang import _ from anki.lang import _
from anki.utils import ids2str, splitFields, json, namedtmp from anki.utils import ids2str, splitFields, json, namedtmp
from anki.hooks import runHook from anki.hooks import runHook
@ -303,7 +304,7 @@ class AnkiPackageExporter(AnkiExporter):
z.write(mpath, cStr, zipfile.ZIP_DEFLATED) z.write(mpath, cStr, zipfile.ZIP_DEFLATED)
else: else:
z.write(mpath, cStr, zipfile.ZIP_STORED) z.write(mpath, cStr, zipfile.ZIP_STORED)
media[cStr] = file media[cStr] = unicodedata.normalize("NFC", file)
runHook("exportedMediaFiles", c) runHook("exportedMediaFiles", c)
return media return media

View file

@ -156,7 +156,7 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
if typeHint in typeMap: if typeHint in typeMap:
fname += typeMap[typeHint] fname += typeMap[typeHint]
# make sure we write it in NFC form (on mac will autoconvert to NFD), # make sure we write it in NFC form (pre-APFS Macs will autoconvert to NFD),
# and return an NFC-encoded reference # and return an NFC-encoded reference
fname = unicodedata.normalize("NFC", fname) fname = unicodedata.normalize("NFC", fname)
# remove any dangerous characters # remove any dangerous characters
@ -299,8 +299,7 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
continue continue
nfcFile = unicodedata.normalize("NFC", file) nfcFile = unicodedata.normalize("NFC", file)
# we enforce NFC fs encoding on non-macs; on macs we'll have gotten # we enforce NFC fs encoding on non-macs
# NFD so we use the above variable for comparing references
if not isMac and not local: if not isMac and not local:
if file != nfcFile: if file != nfcFile:
# delete if we already have the NFC form, otherwise rename # delete if we already have the NFC form, otherwise rename
@ -407,7 +406,9 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
self.cache = {} self.cache = {}
for (name, csum, mod) in self.db.execute( for (name, csum, mod) in self.db.execute(
"select fname, csum, mtime from media where csum is not null"): "select fname, csum, mtime from media where csum is not null"):
self.cache[name] = [csum, mod, False] # previous entries may not have been in NFC form
normname = unicodedata.normalize("NFC", name)
self.cache[normname] = [csum, mod, False]
added = [] added = []
removed = [] removed = []
# loop through on-disk files # loop through on-disk files
@ -430,26 +431,30 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
self.col.log("ignoring file over 100MB", f.name) self.col.log("ignoring file over 100MB", f.name)
continue continue
# check encoding # check encoding
normname = unicodedata.normalize("NFC", f.name)
if not isMac: if not isMac:
normf = unicodedata.normalize("NFC", f.name) if f.name != normname:
if f.name != normf:
# wrong filename encoding which will cause sync errors # wrong filename encoding which will cause sync errors
if os.path.exists(normf): if os.path.exists(normname):
os.unlink(f.name) os.unlink(f.name)
else: else:
os.rename(f.name, normf) os.rename(f.name, normname)
else:
# on Macs we can access the file using any normalization
pass
# newly added? # newly added?
mtime = int(f.stat().st_mtime) mtime = int(f.stat().st_mtime)
if f.name not in self.cache: if normname not in self.cache:
added.append((f.name, mtime)) added.append((normname, mtime))
else: else:
# modified since last time? # modified since last time?
if mtime != self.cache[f.name][1]: if mtime != self.cache[normname][1]:
# and has different checksum? # and has different checksum?
if self._checksum(f.name) != self.cache[f.name][0]: if self._checksum(normname) != self.cache[normname][0]:
added.append((f.name, mtime)) added.append((normname, mtime))
# mark as used # mark as used
self.cache[f.name][2] = True self.cache[normname][2] = True
# look for any entries in the cache that no longer exist on disk # look for any entries in the cache that no longer exist on disk
for (k, v) in list(self.cache.items()): for (k, v) in list(self.cache.items()):
if not v[2]: if not v[2]:
@ -551,11 +556,8 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
data = z.read(i) data = z.read(i)
csum = checksum(data) csum = checksum(data)
name = meta[i.filename] name = meta[i.filename]
# normalize name for platform # normalize name
if isMac: name = unicodedata.normalize("NFC", name)
name = unicodedata.normalize("NFD", name)
else:
name = unicodedata.normalize("NFC", name)
# save file # save file
with open(name, "wb") as f: with open(name, "wb") as f:
f.write(data) f.write(data)