fix duplicate check in import

This commit is contained in:
Damien Elmes 2012-05-06 17:29:02 +09:00
parent 362c213a6c
commit a558c47330

View file

@ -92,6 +92,7 @@ class NoteImporter(Importer):
csums[csum].append(id) csums[csum].append(id)
else: else:
csums[csum] = [id] csums[csum] = [id]
firsts = {}
fld0idx = self.mapping.index(self.model['flds'][0]['name']) fld0idx = self.mapping.index(self.model['flds'][0]['name'])
self._fmap = self.col.models.fieldMap(self.model) self._fmap = self.col.models.fieldMap(self.model)
self._nextID = timestampID(self.col.db, "notes") self._nextID = timestampID(self.col.db, "notes")
@ -108,13 +109,16 @@ class NoteImporter(Importer):
self.log.append(_("Empty first field: %s") % self.log.append(_("Empty first field: %s") %
" ".join(n.fields)) " ".join(n.fields))
continue continue
# earlier in import?
if fld0 in firsts:
# duplicates in source file; log and ignore
self.log.append(_("Appeared twice in file: %s") %
fld0)
continue
firsts[fld0] = True
# already exists? # already exists?
found = False
if csum in csums: if csum in csums:
if csums[csum] == -1:
# duplicates in source file; log and ignore
self.log.append(_("Appeared twice in file: %s") %
fld0)
continue
# csum is not a guarantee; have to check # csum is not a guarantee; have to check
for id in csums[csum]: for id in csums[csum]:
flds = self.col.db.scalar( flds = self.col.db.scalar(
@ -122,20 +126,20 @@ class NoteImporter(Importer):
sflds = splitFields(flds) sflds = splitFields(flds)
if fld0 == sflds[0]: if fld0 == sflds[0]:
# duplicate # duplicate
found = True
if self.update: if self.update:
data = self.updateData(n, id, sflds) data = self.updateData(n, id, sflds)
if data: if data:
updates.append(data) updates.append(data)
# note that we've seen this note once already found = True
csums[fieldChecksum(n.fields[0])] = -1 break
break
# newly add # newly add
else: if not found:
data = self.newData(n) data = self.newData(n)
if data: if data:
new.append(data) new.append(data)
# note that we've seen this note once already # note that we've seen this note once already
csums[fieldChecksum(n.fields[0])] = -1 firsts[fld0] = True
self.addNew(new) self.addNew(new)
self.addUpdates(updates) self.addUpdates(updates)
self.col.updateFieldCache(self._ids) self.col.updateFieldCache(self._ids)