From 88fe45b1bb0b39c6ebad6acfae48b595481f699d Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Wed, 28 Feb 2018 16:22:06 +1000 Subject: [PATCH] normalize to NFC on all platforms APFS is normalization-preserving, which results in spurious changes being detected if we assume we'll receive NFD from file listings this needs to be ported to 2.0.x as well --- anki/exporting.py | 5 +++-- anki/media.py | 40 +++++++++++++++++++++------------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/anki/exporting.py b/anki/exporting.py index 3df18498e..7867b46e8 100644 --- a/anki/exporting.py +++ b/anki/exporting.py @@ -2,7 +2,8 @@ # Copyright: Damien Elmes # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -import re, os, zipfile, shutil +import re, os, zipfile, shutil, unicodedata + from anki.lang import _ from anki.utils import ids2str, splitFields, json, namedtmp from anki.hooks import runHook @@ -303,7 +304,7 @@ class AnkiPackageExporter(AnkiExporter): z.write(mpath, cStr, zipfile.ZIP_DEFLATED) else: z.write(mpath, cStr, zipfile.ZIP_STORED) - media[cStr] = file + media[cStr] = unicodedata.normalize("NFC", file) runHook("exportedMediaFiles", c) return media diff --git a/anki/media.py b/anki/media.py index 91b124a1f..461a6e8fd 100644 --- a/anki/media.py +++ b/anki/media.py @@ -156,7 +156,7 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0); if typeHint in typeMap: fname += typeMap[typeHint] - # make sure we write it in NFC form (on mac will autoconvert to NFD), + # make sure we write it in NFC form (pre-APFS Macs will autoconvert to NFD), # and return an NFC-encoded reference fname = unicodedata.normalize("NFC", fname) # remove any dangerous characters @@ -299,8 +299,7 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0); continue nfcFile = unicodedata.normalize("NFC", file) - # we enforce NFC fs encoding on non-macs; on macs we'll have gotten - # NFD so we use the above variable for comparing references + # we enforce NFC fs encoding on non-macs if not isMac and not local: if file != nfcFile: # delete if we already have the NFC form, otherwise rename @@ -407,7 +406,9 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0); self.cache = {} for (name, csum, mod) in self.db.execute( "select fname, csum, mtime from media where csum is not null"): - self.cache[name] = [csum, mod, False] + # previous entries may not have been in NFC form + normname = unicodedata.normalize("NFC", name) + self.cache[normname] = [csum, mod, False] added = [] removed = [] # loop through on-disk files @@ -430,26 +431,30 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0); self.col.log("ignoring file over 100MB", f.name) continue # check encoding + normname = unicodedata.normalize("NFC", f.name) if not isMac: - normf = unicodedata.normalize("NFC", f.name) - if f.name != normf: + if f.name != normname: # wrong filename encoding which will cause sync errors - if os.path.exists(normf): + if os.path.exists(normname): os.unlink(f.name) else: - os.rename(f.name, normf) + os.rename(f.name, normname) + else: + # on Macs we can access the file using any normalization + pass + # newly added? mtime = int(f.stat().st_mtime) - if f.name not in self.cache: - added.append((f.name, mtime)) + if normname not in self.cache: + added.append((normname, mtime)) else: # modified since last time? - if mtime != self.cache[f.name][1]: + if mtime != self.cache[normname][1]: # and has different checksum? - if self._checksum(f.name) != self.cache[f.name][0]: - added.append((f.name, mtime)) + if self._checksum(normname) != self.cache[normname][0]: + added.append((normname, mtime)) # mark as used - self.cache[f.name][2] = True + self.cache[normname][2] = True # look for any entries in the cache that no longer exist on disk for (k, v) in list(self.cache.items()): if not v[2]: @@ -551,11 +556,8 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0); data = z.read(i) csum = checksum(data) name = meta[i.filename] - # normalize name for platform - if isMac: - name = unicodedata.normalize("NFD", name) - else: - name = unicodedata.normalize("NFC", name) + # normalize name + name = unicodedata.normalize("NFC", name) # save file with open(name, "wb") as f: f.write(data)