diff --git a/anki/deck.py b/anki/deck.py index 7ac485795..505a06050 100644 --- a/anki/deck.py +++ b/anki/deck.py @@ -16,7 +16,7 @@ from anki.lang import _, ngettext from anki.errors import DeckAccessError from anki.stdmodels import BasicModel from anki.utils import parseTags, tidyHTML, genID, ids2str, hexifyID, \ - canonifyTags, joinTags, addTags, checksum + canonifyTags, joinTags, addTags, checksum, fieldChecksum from anki.history import CardHistoryEntry from anki.models import Model, CardModel, formatQA from anki.stats import dailyStats, globalStats, genToday @@ -56,7 +56,7 @@ SEARCH_FIELD = 6 SEARCH_FIELD_EXISTS = 7 SEARCH_QA = 8 SEARCH_PHRASE_WB = 9 -DECK_VERSION = 70 +DECK_VERSION = 71 deckVarsTable = Table( 'deckVars', metadata, @@ -393,6 +393,32 @@ type = (case when type >= 0 then relativeDelay else relativeDelay - 3 end) """) + def updateAllFieldChecksums(self): + # zero out + self.s.statement("update fields set chksum = ''") + # add back for unique fields + for m in self.models: + for fm in m.fieldModels: + self.updateFieldChecksums(fm.id) + + def updateFieldChecksums(self, fmid): + self.s.flush() + self.setSchemaModified() + unique = self.s.scalar( + "select \"unique\" from fieldModels where id = :id", id=fmid) + if unique: + l = [] + for (id, value) in self.s.all( + "select id, value from fields where fieldModelId = :id", + id=fmid): + l.append({'id':id, 'chk':fieldChecksum(value)}) + self.s.statements( + "update fields set chksum = :chk where id = :id", l) + else: + self.s.statement( + "update fields set chksum = '' where fieldModelId=:id", + id=fmid) + def _cardQueue(self, card): return self.cardType(card) @@ -2697,10 +2723,15 @@ select id from facts where spaceUntil like :_ff_%d escape '\\'""" % c for (id, fid, val) in rows if val.find(src) != -1] # update - self.s.statements( - 'update fields set value = :val where id = :id', modded) - self.updateCardQACacheFromIds([f['fid'] for f in modded], + if modded: + self.s.statements( + 'update fields set value = :val where id = :id', modded) + self.updateCardQACacheFromIds([f['fid'] for f in modded], type="facts") + if field: + self.updateFieldChecksums(field) + else: + self.updateAllFieldChecksums() return len(set([f['fid'] for f in modded])) # Find duplicates @@ -2989,7 +3020,8 @@ Return new path, relative to media dir.""" self.modified = newTime or time.time() def setSchemaModified(self): - self.setVar("schemaMod", time.time()) + # we might be called during an upgrade, so avoid bumping modtime + self.setVar("schemaMod", time.time(), mod=False) def flushMod(self): "Mark modified and flush to DB." @@ -3113,10 +3145,10 @@ where id = :id""", fid=f.id, cmid=m.cardModels[0].id, id=id) if quick: num = 4 else: - num = 9 + num = 10 oldSize = os.stat(self.path)[stat.ST_SIZE] self.startProgress(num) - self.updateProgress(_("Checking integrity...")) + self.updateProgress(_("Checking database...")) if self.s.scalar("pragma integrity_check") != "ok": self.finishProgress() return _("Database file is damaged.\n" @@ -3125,7 +3157,7 @@ where id = :id""", fid=f.id, cmid=m.cardModels[0].id, id=id) self.updateProgress() DeckStorage._addIndices(self) # does the user have a model? - self.updateProgress(_("Checking schema...")) + self.updateProgress() if not self.s.scalar("select count(id) from models"): self.addModel(BasicModel()) problems.append(_("Deck was missing a model")) @@ -3209,10 +3241,10 @@ select id from fields where factId not in (select id from facts)""") "update cardModels set allowEmptyAnswer = 1, typeAnswer = '' " "where allowEmptyAnswer is null or typeAnswer is null") # fix tags - self.updateProgress(_("Rebuilding tag cache...")) + self.updateProgress() self.updateCardTags() # make sure ordinals are correct - self.updateProgress(_("Updating ordinals...")) + self.updateProgress() self.s.statement(""" update fields set ordinal = (select ordinal from fieldModels where id = fieldModelId)""") @@ -3220,7 +3252,7 @@ where id = fieldModelId)""") update cards set ordinal = (select ordinal from cardModels where cards.cardModelId = cardModels.id)""") # fix problems with stripping html - self.updateProgress(_("Rebuilding QA cache...")) + self.updateProgress() fields = self.s.all("select id, value from fields") newFields = [] for (id, value) in fields: @@ -3228,11 +3260,14 @@ where cards.cardModelId = cardModels.id)""") self.s.statements( "update fields set value=:value where id=:id", newFields) + # and field checksums + self.updateProgress() + self.updateAllFieldChecksums() # regenerate question/answer cache for m in self.models: self.updateCardsFromModel(m, dirty=False) # rebuild - self.updateProgress(_("Rebuilding types...")) + self.updateProgress() self.rebuildTypes() # since we can ensure the updated version will be propagated to # all locations, we can forget old tombstones @@ -3241,7 +3276,7 @@ where cards.cardModelId = cardModels.id)""") # force a full sync self.setSchemaModified() # and finally, optimize - self.updateProgress(_("Optimizing...")) + self.updateProgress() self.optimize() newSize = os.stat(self.path)[stat.ST_SIZE] save = (oldSize - newSize)/1024 @@ -3521,8 +3556,16 @@ class DeckStorage(object): metadata.create_all(engine) deck = DeckStorage._init(s) else: - # add any possibly new tables if we're upgrading ver = s.scalar("select version from decks limit 1") + # add a checksum to fields + if ver < 71: + try: + s.execute( + "alter table fields add column chksum text "+ + "not null default ''") + except: + pass + # add any possibly new tables if we're upgrading if ver < DECK_VERSION: metadata.create_all(engine) deck = s.query(Deck).get(1) @@ -3695,7 +3738,7 @@ create index if not exists ix_fields_factId on fields (factId)""") deck.s.statement(""" create index if not exists ix_fields_fieldModelId on fields (fieldModelId)""") deck.s.statement(""" -create index if not exists ix_fields_value on fields (value)""") +create index if not exists ix_fields_chksum on fields (chksum)""") # media deck.s.statement(""" create unique index if not exists ix_media_filename on media (filename)""") @@ -3860,9 +3903,17 @@ this message. (ERR-0101)""") % { "revCardsDue", "revCardsRandom", "acqCardsRandom", "acqCardsOld", "acqCardsNew"): deck.s.statement("drop view if exists %s" % v) + deck.version = 70 + deck.s.commit() + if deck.version < 71: + # remove the expensive value cache + deck.s.statement("drop index if exists ix_fields_value") + # add checksums and index + deck.updateAllFieldChecksums() + DeckStorage._addIndices(deck) deck.s.execute("vacuum") deck.s.execute("analyze") - deck.version = 70 + deck.version = 71 deck.s.commit() # executing a pragma here is very slow on large decks, so we store # our own record diff --git a/anki/facts.py b/anki/facts.py index 3d21faf14..6e162d92a 100644 --- a/anki/facts.py +++ b/anki/facts.py @@ -12,7 +12,7 @@ import time from anki.db import * from anki.errors import * from anki.models import Model, FieldModel, fieldModelsTable -from anki.utils import genID, stripHTMLMedia +from anki.utils import genID, stripHTMLMedia, fieldChecksum from anki.hooks import runHook # Fields in a fact @@ -25,7 +25,8 @@ fieldsTable = Table( Column('fieldModelId', Integer, ForeignKey("fieldModels.id"), nullable=False), Column('ordinal', Integer, nullable=False), - Column('value', UnicodeText, nullable=False)) + Column('value', UnicodeText, nullable=False), + Column('chksum', String, nullable=False, default="")) class Field(object): "A field in a fact." @@ -90,9 +91,14 @@ class Fact(object): def __setitem__(self, key, value): try: - [f for f in self.fields if f.name == key][0].value = value + item = [f for f in self.fields if f.name == key][0] except IndexError: raise KeyError + item.value = value + if item.fieldModel.unique: + item.chksum = fieldChecksum(value) + else: + item.chksum = "" def get(self, key, default): try: @@ -121,10 +127,11 @@ class Fact(object): if not field.fieldModel.unique: return True req = ("select value from fields " - "where fieldModelId = :fmid and value = :val") + "where fieldModelId = :fmid and value = :val and chksum = :chk") if field.id: req += " and id != %s" % field.id - return not s.scalar(req, val=field.value, fmid=field.fieldModel.id) + return not s.scalar(req, val=field.value, fmid=field.fieldModel.id, + chk=fieldChecksum(field.value)) def focusLost(self, field): runHook('fact.focusLost', self, field) diff --git a/anki/importing/__init__.py b/anki/importing/__init__.py index d975a3ab7..ad8376f02 100644 --- a/anki/importing/__init__.py +++ b/anki/importing/__init__.py @@ -17,7 +17,7 @@ import time from anki.cards import cardsTable from anki.facts import factsTable, fieldsTable from anki.lang import _ -from anki.utils import genID, canonifyTags +from anki.utils import genID, canonifyTags, fieldChecksum from anki.utils import canonifyTags, ids2str from anki.errors import * from anki.deck import NEW_CARDS_RANDOM @@ -122,11 +122,12 @@ and value != ''""", continue data = [{'fid': fid, 'fmid': fm.id, - 'v': c.fields[index]} + 'v': c.fields[index], + 'chk': self.maybeChecksum(c.fields[index], fm.unique)} for (fid, c) in upcards] self.deck.s.execute(""" -update fields set value = :v where factId = :fid and fieldModelId = :fmid""", - data) +update fields set value = :v, chksum = :chk where factId = :fid +and fieldModelId = :fmid""", data) # update tags self.deck.updateProgress() if tagsIdx is not None: @@ -144,7 +145,7 @@ update fields set value = :v where factId = :fid and fieldModelId = :fmid""", self.deck.updateCardTags(cids) self.deck.updateProgress() self.deck.updateCardsFromFactIds(fids) - self.total = len(fids) + self.total = len(cards) self.deck.setModified() self.deck.finishProgress() @@ -152,6 +153,11 @@ update fields set value = :v where factId = :fid and fieldModelId = :fmid""", "The number of fields." return 0 + def maybeChecksum(self, data, unique): + if not unique: + return "" + return fieldChecksum(data) + def foreignCards(self): "Return a list of foreign cards for importing." assert 0 @@ -254,7 +260,11 @@ where factId in (%s)""" % ",".join([str(s) for s in factIds])) 'ordinal': fm.ordinal, 'id': genID(), 'value': (index is not None and - cards[m].fields[index] or u"")} + cards[m].fields[index] or u""), + 'chksum': self.maybeChecksum( + index is not None and + cards[m].fields[index] or u"", fm.unique) + } for m in range(len(cards))] self.deck.s.execute(fieldsTable.insert(), data) diff --git a/anki/models.py b/anki/models.py index 843835e12..368805615 100644 --- a/anki/models.py +++ b/anki/models.py @@ -43,6 +43,7 @@ fieldModelsTable = Table( # reused as RTL marker Column('features', UnicodeText, nullable=False, default=u""), Column('required', Boolean, nullable=False, default=True), + # if code changes this, it should call deck.updateFieldChecksums() Column('unique', Boolean, nullable=False, default=True), # sqlite keyword Column('numeric', Boolean, nullable=False, default=False), # display diff --git a/anki/sync.py b/anki/sync.py index 09fe4570d..65428a04a 100644 --- a/anki/sync.py +++ b/anki/sync.py @@ -430,7 +430,7 @@ class SyncTools(object): select id, modelId, created, %s, tags, spaceUntil, lastCardId from facts where id in %s""" % (modified, factIds))), 'fields': self.realLists(self.deck.s.all(""" -select id, factId, fieldModelId, ordinal, value from fields +select id, factId, fieldModelId, ordinal, value, chksum from fields where factId in %s""" % factIds)) } @@ -455,12 +455,17 @@ insert or replace into facts values (:id, :modelId, :created, :modified, :tags, :spaceUntil, :lastCardId)""", dlist) # now fields + def chksum(f): + if len(f) > 5: + return f[5] + return self.deck.fieldChecksum(f[4]) dlist = [{ 'id': f[0], 'factId': f[1], 'fieldModelId': f[2], 'ordinal': f[3], - 'value': f[4] + 'value': f[4], + 'chksum': f[5] } for f in fields] # delete local fields since ids may have changed self.deck.s.execute( @@ -469,9 +474,9 @@ values # then update self.deck.s.execute(""" insert into fields -(id, factId, fieldModelId, ordinal, value) +(id, factId, fieldModelId, ordinal, value, chksum) values -(:id, :factId, :fieldModelId, :ordinal, :value)""", dlist) +(:id, :factId, :fieldModelId, :ordinal, :value, :chksum)""", dlist) self.deck.s.statement( "delete from factsDeleted where factId in %s" % ids2str([f[0] for f in facts])) diff --git a/anki/utils.py b/anki/utils.py index 9f960b553..c8752c7af 100644 --- a/anki/utils.py +++ b/anki/utils.py @@ -277,6 +277,12 @@ def deleteTags(tagstr, tags): def checksum(data): return md5(data).hexdigest() +def fieldChecksum(data): + # 8 digit md5 hash of utf8 string, or empty string if empty value + if not data: + return "" + return checksum(data.encode("utf-8"))[:8] + def call(argv, wait=True, **kwargs): try: o = subprocess.Popen(argv, **kwargs) diff --git a/tests/test_deck.py b/tests/test_deck.py index dce679f4a..652d463ca 100644 --- a/tests/test_deck.py +++ b/tests/test_deck.py @@ -129,6 +129,41 @@ def test_factAddDelete(): # and the second should clear the fact deck.deleteCard(id2) +def test_fieldChecksum(): + deck = DeckStorage.Deck() + deck.addModel(BasicModel()) + f = deck.newFact() + f['Front'] = u"new"; f['Back'] = u"new2" + deck.addFact(f) + (id, sum) = deck.s.first( + "select id, chksum from fields where value = 'new'") + assert sum == "22af645d" + # empty field should have no checksum + f['Front'] = u"" + deck.s.flush() + assert deck.s.scalar( + "select chksum from fields where id = :id", id=id) == "" + # changing the value should change the checksum + f['Front'] = u"newx" + deck.s.flush() + assert deck.s.scalar( + "select chksum from fields where id = :id", id=id) == "4b0e5a4c" + # back should have no checksum, because it's not set to be unique + (id, sum) = deck.s.first( + "select id, chksum from fields where value = 'new2'") + assert sum == "" + # if we turn on unique, it should get a checksum + fm = f.model.fieldModels[1] + fm.unique = True + deck.updateFieldChecksums(fm.id) + assert deck.s.scalar( + "select chksum from fields where id = :id", id=id) == "82f2ec5f" + # and turning it off should zero the checksum again + fm.unique = False + deck.updateFieldChecksums(fm.id) + assert deck.s.scalar( + "select chksum from fields where id = :id", id=id) == "" + def test_modelAddDelete(): deck = DeckStorage.Deck() deck.addModel(BasicModel())