scan media each time

Experiment with ignoring dirMod and scanning the folder for changes on
each sync. Use scandir() for better performance. Please let me know if
this causes a noticeable slowdown for you.
This commit is contained in:
Damien Elmes 2017-08-29 10:52:13 +10:00
parent 25cb6d3abe
commit c18bd47a6e
2 changed files with 42 additions and 65 deletions

View file

@ -121,18 +121,6 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
def dir(self): def dir(self):
return self._dir return self._dir
def _isFAT32(self):
if not isWin:
return
import win32api, win32file
try:
name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
except:
# mapped & unmapped network drive; pray that it's not vfat
return
if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
return True
# Adding media # Adding media
########################################################################## ##########################################################################
# opath must be in unicode # opath must be in unicode
@ -339,8 +327,7 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
def findChanges(self): def findChanges(self):
"Scan the media folder if it's changed, and note any changes." "Scan the media folder if it's changed, and note any changes."
if self._changed(): self._logChanges()
self._logChanges()
def haveDirty(self): def haveDirty(self):
return self.db.scalar("select 1 from media where dirty=1 limit 1") return self.db.scalar("select 1 from media where dirty=1 limit 1")
@ -351,27 +338,16 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
def _checksum(self, path): def _checksum(self, path):
return checksum(open(path, "rb").read()) return checksum(open(path, "rb").read())
def _changed(self):
"Return dir mtime if it has changed since the last findChanges()"
# doesn't track edits, but user can add or remove a file to update
mod = self.db.scalar("select dirMod from meta")
mtime = self._mtime(self.dir())
if not self._isFAT32() and mod and mod == mtime:
return False
return mtime
def _logChanges(self): def _logChanges(self):
(added, removed) = self._changes() (added, removed) = self._changes()
media = [] media = []
for f in added: for f, mtime in added:
mt = self._mtime(f) media.append((f, self._checksum(f), mtime, 1))
media.append((f, self._checksum(f), mt, 1))
for f in removed: for f in removed:
media.append((f, None, 0, 1)) media.append((f, None, 0, 1))
# update media db # update media db
self.db.executemany("insert or replace into media values (?,?,?,?)", self.db.executemany("insert or replace into media values (?,?,?,?)",
media) media)
self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
self.db.commit() self.db.commit()
def _changes(self): def _changes(self):
@ -382,43 +358,45 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
added = [] added = []
removed = [] removed = []
# loop through on-disk files # loop through on-disk files
for f in os.listdir(self.dir()): with os.scandir(self.dir()) as it:
# ignore folders and thumbs.db for f in it:
if os.path.isdir(f): # ignore folders and thumbs.db
continue if f.is_dir():
if f.lower() == "thumbs.db": continue
continue if f.name.lower() == "thumbs.db":
# and files with invalid chars continue
if self.hasIllegal(f): # and files with invalid chars
continue if self.hasIllegal(f.name):
# empty files are invalid; clean them up and continue continue
sz = os.path.getsize(f) # empty files are invalid; clean them up and continue
if not sz: sz = f.stat().st_size
os.unlink(f) if not sz:
continue os.unlink(f.name)
if sz > 100*1024*1024: continue
self.col.log("ignoring file over 100MB", f) if sz > 100*1024*1024:
continue self.col.log("ignoring file over 100MB", f.name)
# check encoding continue
if not isMac: # check encoding
normf = unicodedata.normalize("NFC", f) if not isMac:
if f != normf: normf = unicodedata.normalize("NFC", f.name)
# wrong filename encoding which will cause sync errors if f.name != normf:
if os.path.exists(normf): # wrong filename encoding which will cause sync errors
os.unlink(f) if os.path.exists(normf):
else: os.unlink(f.name)
os.rename(f, normf) else:
# newly added? os.rename(f.name, normf)
if f not in self.cache: # newly added?
added.append(f) mtime = int(f.stat().st_mtime)
else: if f.name not in self.cache:
# modified since last time? added.append((f.name, mtime))
if self._mtime(f) != self.cache[f][1]: else:
# and has different checksum? # modified since last time?
if self._checksum(f) != self.cache[f][0]: if mtime != self.cache[f.name][1]:
added.append(f) # and has different checksum?
# mark as used if self._checksum(f.name) != self.cache[f.name][0]:
self.cache[f][2] = True added.append((f.name, mtime))
# mark as used
self.cache[f.name][2] = True
# look for any entries in the cache that no longer exist on disk # look for any entries in the cache that no longer exist on disk
for (k, v) in list(self.cache.items()): for (k, v) in list(self.cache.items()):
if not v[2]: if not v[2]:

View file

@ -69,7 +69,6 @@ def test_deckIntegration():
def test_changes(): def test_changes():
d = getEmptyCol() d = getEmptyCol()
assert d.media._changed()
def added(): def added():
return d.media.db.execute("select fname from media where csum is not null") return d.media.db.execute("select fname from media where csum is not null")
def removed(): def removed():