scan media each time

Experiment with ignoring dirMod and scanning the folder for changes on
each sync. Use scandir() for better performance. Please let me know if
this causes a noticeable slowdown for you.
This commit is contained in:
Damien Elmes 2017-08-29 10:52:13 +10:00
parent 25cb6d3abe
commit c18bd47a6e
2 changed files with 42 additions and 65 deletions

View file

@ -121,18 +121,6 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
def dir(self): def dir(self):
return self._dir return self._dir
def _isFAT32(self):
if not isWin:
return
import win32api, win32file
try:
name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
except:
# mapped & unmapped network drive; pray that it's not vfat
return
if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
return True
# Adding media # Adding media
########################################################################## ##########################################################################
# opath must be in unicode # opath must be in unicode
@ -339,7 +327,6 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
def findChanges(self): def findChanges(self):
"Scan the media folder if it's changed, and note any changes." "Scan the media folder if it's changed, and note any changes."
if self._changed():
self._logChanges() self._logChanges()
def haveDirty(self): def haveDirty(self):
@ -351,27 +338,16 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
def _checksum(self, path): def _checksum(self, path):
return checksum(open(path, "rb").read()) return checksum(open(path, "rb").read())
def _changed(self):
"Return dir mtime if it has changed since the last findChanges()"
# doesn't track edits, but user can add or remove a file to update
mod = self.db.scalar("select dirMod from meta")
mtime = self._mtime(self.dir())
if not self._isFAT32() and mod and mod == mtime:
return False
return mtime
def _logChanges(self): def _logChanges(self):
(added, removed) = self._changes() (added, removed) = self._changes()
media = [] media = []
for f in added: for f, mtime in added:
mt = self._mtime(f) media.append((f, self._checksum(f), mtime, 1))
media.append((f, self._checksum(f), mt, 1))
for f in removed: for f in removed:
media.append((f, None, 0, 1)) media.append((f, None, 0, 1))
# update media db # update media db
self.db.executemany("insert or replace into media values (?,?,?,?)", self.db.executemany("insert or replace into media values (?,?,?,?)",
media) media)
self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
self.db.commit() self.db.commit()
def _changes(self): def _changes(self):
@ -382,43 +358,45 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
added = [] added = []
removed = [] removed = []
# loop through on-disk files # loop through on-disk files
for f in os.listdir(self.dir()): with os.scandir(self.dir()) as it:
for f in it:
# ignore folders and thumbs.db # ignore folders and thumbs.db
if os.path.isdir(f): if f.is_dir():
continue continue
if f.lower() == "thumbs.db": if f.name.lower() == "thumbs.db":
continue continue
# and files with invalid chars # and files with invalid chars
if self.hasIllegal(f): if self.hasIllegal(f.name):
continue continue
# empty files are invalid; clean them up and continue # empty files are invalid; clean them up and continue
sz = os.path.getsize(f) sz = f.stat().st_size
if not sz: if not sz:
os.unlink(f) os.unlink(f.name)
continue continue
if sz > 100*1024*1024: if sz > 100*1024*1024:
self.col.log("ignoring file over 100MB", f) self.col.log("ignoring file over 100MB", f.name)
continue continue
# check encoding # check encoding
if not isMac: if not isMac:
normf = unicodedata.normalize("NFC", f) normf = unicodedata.normalize("NFC", f.name)
if f != normf: if f.name != normf:
# wrong filename encoding which will cause sync errors # wrong filename encoding which will cause sync errors
if os.path.exists(normf): if os.path.exists(normf):
os.unlink(f) os.unlink(f.name)
else: else:
os.rename(f, normf) os.rename(f.name, normf)
# newly added? # newly added?
if f not in self.cache: mtime = int(f.stat().st_mtime)
added.append(f) if f.name not in self.cache:
added.append((f.name, mtime))
else: else:
# modified since last time? # modified since last time?
if self._mtime(f) != self.cache[f][1]: if mtime != self.cache[f.name][1]:
# and has different checksum? # and has different checksum?
if self._checksum(f) != self.cache[f][0]: if self._checksum(f.name) != self.cache[f.name][0]:
added.append(f) added.append((f.name, mtime))
# mark as used # mark as used
self.cache[f][2] = True self.cache[f.name][2] = True
# look for any entries in the cache that no longer exist on disk # look for any entries in the cache that no longer exist on disk
for (k, v) in list(self.cache.items()): for (k, v) in list(self.cache.items()):
if not v[2]: if not v[2]:

View file

@ -69,7 +69,6 @@ def test_deckIntegration():
def test_changes(): def test_changes():
d = getEmptyCol() d = getEmptyCol()
assert d.media._changed()
def added(): def added():
return d.media.db.execute("select fname from media where csum is not null") return d.media.db.execute("select fname from media where csum is not null")
def removed(): def removed():