mirror of
https://github.com/ankitects/anki.git
synced 2025-09-24 16:56:36 -04:00
scan media each time
Experiment with ignoring dirMod and scanning the folder for changes on each sync. Use scandir() for better performance. Please let me know if this causes a noticeable slowdown for you.
This commit is contained in:
parent
25cb6d3abe
commit
c18bd47a6e
2 changed files with 42 additions and 65 deletions
106
anki/media.py
106
anki/media.py
|
@ -121,18 +121,6 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
|
|||
def dir(self):
|
||||
return self._dir
|
||||
|
||||
def _isFAT32(self):
|
||||
if not isWin:
|
||||
return
|
||||
import win32api, win32file
|
||||
try:
|
||||
name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
|
||||
except:
|
||||
# mapped & unmapped network drive; pray that it's not vfat
|
||||
return
|
||||
if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
|
||||
return True
|
||||
|
||||
# Adding media
|
||||
##########################################################################
|
||||
# opath must be in unicode
|
||||
|
@ -339,8 +327,7 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
|
|||
|
||||
def findChanges(self):
|
||||
"Scan the media folder if it's changed, and note any changes."
|
||||
if self._changed():
|
||||
self._logChanges()
|
||||
self._logChanges()
|
||||
|
||||
def haveDirty(self):
|
||||
return self.db.scalar("select 1 from media where dirty=1 limit 1")
|
||||
|
@ -351,27 +338,16 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
|
|||
def _checksum(self, path):
|
||||
return checksum(open(path, "rb").read())
|
||||
|
||||
def _changed(self):
|
||||
"Return dir mtime if it has changed since the last findChanges()"
|
||||
# doesn't track edits, but user can add or remove a file to update
|
||||
mod = self.db.scalar("select dirMod from meta")
|
||||
mtime = self._mtime(self.dir())
|
||||
if not self._isFAT32() and mod and mod == mtime:
|
||||
return False
|
||||
return mtime
|
||||
|
||||
def _logChanges(self):
|
||||
(added, removed) = self._changes()
|
||||
media = []
|
||||
for f in added:
|
||||
mt = self._mtime(f)
|
||||
media.append((f, self._checksum(f), mt, 1))
|
||||
for f, mtime in added:
|
||||
media.append((f, self._checksum(f), mtime, 1))
|
||||
for f in removed:
|
||||
media.append((f, None, 0, 1))
|
||||
# update media db
|
||||
self.db.executemany("insert or replace into media values (?,?,?,?)",
|
||||
media)
|
||||
self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
|
||||
self.db.commit()
|
||||
|
||||
def _changes(self):
|
||||
|
@ -382,43 +358,45 @@ create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);
|
|||
added = []
|
||||
removed = []
|
||||
# loop through on-disk files
|
||||
for f in os.listdir(self.dir()):
|
||||
# ignore folders and thumbs.db
|
||||
if os.path.isdir(f):
|
||||
continue
|
||||
if f.lower() == "thumbs.db":
|
||||
continue
|
||||
# and files with invalid chars
|
||||
if self.hasIllegal(f):
|
||||
continue
|
||||
# empty files are invalid; clean them up and continue
|
||||
sz = os.path.getsize(f)
|
||||
if not sz:
|
||||
os.unlink(f)
|
||||
continue
|
||||
if sz > 100*1024*1024:
|
||||
self.col.log("ignoring file over 100MB", f)
|
||||
continue
|
||||
# check encoding
|
||||
if not isMac:
|
||||
normf = unicodedata.normalize("NFC", f)
|
||||
if f != normf:
|
||||
# wrong filename encoding which will cause sync errors
|
||||
if os.path.exists(normf):
|
||||
os.unlink(f)
|
||||
else:
|
||||
os.rename(f, normf)
|
||||
# newly added?
|
||||
if f not in self.cache:
|
||||
added.append(f)
|
||||
else:
|
||||
# modified since last time?
|
||||
if self._mtime(f) != self.cache[f][1]:
|
||||
# and has different checksum?
|
||||
if self._checksum(f) != self.cache[f][0]:
|
||||
added.append(f)
|
||||
# mark as used
|
||||
self.cache[f][2] = True
|
||||
with os.scandir(self.dir()) as it:
|
||||
for f in it:
|
||||
# ignore folders and thumbs.db
|
||||
if f.is_dir():
|
||||
continue
|
||||
if f.name.lower() == "thumbs.db":
|
||||
continue
|
||||
# and files with invalid chars
|
||||
if self.hasIllegal(f.name):
|
||||
continue
|
||||
# empty files are invalid; clean them up and continue
|
||||
sz = f.stat().st_size
|
||||
if not sz:
|
||||
os.unlink(f.name)
|
||||
continue
|
||||
if sz > 100*1024*1024:
|
||||
self.col.log("ignoring file over 100MB", f.name)
|
||||
continue
|
||||
# check encoding
|
||||
if not isMac:
|
||||
normf = unicodedata.normalize("NFC", f.name)
|
||||
if f.name != normf:
|
||||
# wrong filename encoding which will cause sync errors
|
||||
if os.path.exists(normf):
|
||||
os.unlink(f.name)
|
||||
else:
|
||||
os.rename(f.name, normf)
|
||||
# newly added?
|
||||
mtime = int(f.stat().st_mtime)
|
||||
if f.name not in self.cache:
|
||||
added.append((f.name, mtime))
|
||||
else:
|
||||
# modified since last time?
|
||||
if mtime != self.cache[f.name][1]:
|
||||
# and has different checksum?
|
||||
if self._checksum(f.name) != self.cache[f.name][0]:
|
||||
added.append((f.name, mtime))
|
||||
# mark as used
|
||||
self.cache[f.name][2] = True
|
||||
# look for any entries in the cache that no longer exist on disk
|
||||
for (k, v) in list(self.cache.items()):
|
||||
if not v[2]:
|
||||
|
|
|
@ -69,7 +69,6 @@ def test_deckIntegration():
|
|||
|
||||
def test_changes():
|
||||
d = getEmptyCol()
|
||||
assert d.media._changed()
|
||||
def added():
|
||||
return d.media.db.execute("select fname from media where csum is not null")
|
||||
def removed():
|
||||
|
|
Loading…
Reference in a new issue