implement field cache, fix unit tests, remove some importers

the field cache (fsums table) also needs to store the model id to preserve the old behaviour of limiting duplicate checks to a given model, and to ensure we're actually comparing against the same fields removed the dingsbums and wcu importers; will accept them back if the authors port them to the new codebase.
2025-09-24 16:56:36 -04:00 · 2011-03-11 01:03:14 +09:00 · 2011-03-11 01:03:14 +09:00 · 4becd8399c
commit 4becd8399c
parent 1078285f0f
8 changed files with 69 additions and 321 deletions
--- a/anki/deck.py
+++ b/anki/deck.py
@ -475,7 +475,7 @@ due > :now and due < :now""", now=time.time())
        if isRandom:
            due = random.randrange(0, 10000)
        # flush the fact so we get its id
-        fact.flush(cache=False)
+        fact.flush()
        for template in cms:
            print "fixme:specify group on fact add"
            group = self.groupForTemplate(template)
@ -560,7 +560,7 @@ where fid = :fid and tid = :cmid""",
            return
        strids = ids2str(ids)
        self.db.execute("delete from facts where id in %s" % strids)
-        #self.db.execute("delete from fdata where fid in %s" % strids)
+        self.db.execute("delete from fsums where fid in %s" % strids)

    def _deleteDanglingFacts(self):
        "Delete any facts without cards. Don't call this directly."
@ -621,17 +621,20 @@ select id from facts where id not in (select distinct fid from cards)""")
            # trash
            sfids = ids2str(
                self.db.list("select fid from cards where id in "+sids))
+            # need to handle delete of fsums/revlog remotely after sync
+            self.db.execute(
+                "update cards set crt = 0, mod = ? where id in "+sids,
+                intTime())
+            self.db.execute(
+                "update facts set crt = 0, mod = ? where id in "+sfids,
+                intTime())
+            self.db.execute("delete from fsums where fid in "+sfids)
            self.db.execute("delete from revlog where cid in "+sids)
-            self.db.execute("update cards set crt = 0 where id in "+sids)
-            self.db.execute("update facts set crt = 0 where id in "+sfids)
-            self.db.execute("delete from fdata where fid in "+sfids)
        self.finishProgress()

    def emptyTrash(self):
        self.db.executescript("""
 delete from facts where id in (select fid from cards where queue = -4);
-delete from fdata where fid in (select fid from cards where queue = -4);
-delete from revlog where cid in (select id from cards where queue = -4);
 delete from cards where queue = -4;""")

    # Models
--- a/anki/facts.py
+++ b/anki/facts.py
@ -5,7 +5,7 @@
 import time
 from anki.errors import AnkiError
 from anki.utils import stripHTMLMedia, fieldChecksum, intTime, \
-    addTags, deleteTags, parseTags
+    addTags, deleteTags, joinFields, splitFields, ids2str

 class Fact(object):

@ -22,7 +22,6 @@ class Fact(object):
            self.crt = intTime()
            self.mod = self.crt
            self.tags = ""
-            self.cache = ""
            self._fields = [""] * len(self.model.fields)
            self.data = ""
        self._fmap = self.model.fieldMap()
@ -38,7 +37,7 @@ select mid, crt, mod, tags, flds, data from facts where id = ?""", self.id)
        self._fields = self._field.split("\x1f")
        self.model = self.deck.getModel(self.mid)

-    def flush(self, cache=True):
+    def flush(self):
        self.mod = intTime()
        # facts table
        sfld = self._fields[self.model.sortField()]
@ -48,22 +47,22 @@ insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""",
                            self.mod, self.tags, self.joinedFields(),
                            sfld, self.data)
        self.id = res.lastrowid
+        self.updateFieldChecksums()

    def joinedFields(self):
-        return "\x1f".join(self._fields)
+        return joinFields(self._fields)

-#         # fdata table
-#         self.deck.db.execute("delete from fdata where fid = ?", self.id)
-#         d = []
-#         for (fmid, ord, conf) in self._fmap.values():
-#             val = self._fields[ord]
-#             d.append(dict(fid=self.id, fmid=fmid, ord=ord,
-#                           val=val))
-#         d.append(dict(fid=self.id, fmid=0, ord=-1, val=self.tags))
-#         self.deck.db.executemany("""
-# insert into fdata values (:fid, :fmid, :ord, :val, '')""", d)
-#         # media and caches
-#         self.deck.updateCache([self.id], "fact")
+    def updateFieldChecksums(self):
+        self.deck.db.execute("delete from fsums where fid = ?", self.id)
+        d = []
+        for (ord, conf) in self._fmap.values():
+            if not conf['uniq']:
+                continue
+            val = fieldChecksum(self._fields[ord])
+            if not val:
+                continue
+            d.append((self.id, self.mid, val))
+        self.deck.db.executemany("insert into fsums values (?, ?, ?)", d)

    def cards(self):
        return [self.deck.getCard(id) for id in self.deck.db.list(
@ -111,14 +110,26 @@ insert or replace into facts values (?, ?, ?, ?, ?, ?, ?, ?)""",
        if not conf['uniq']:
            return True
        val = self[name]
+        if not val:
+            return True
        csum = fieldChecksum(val)
        if self.id:
            lim = "and fid != :fid"
        else:
            lim = ""
-        return not self.deck.db.scalar(
-            "select 1 from fdata where csum = :c %s and val = :v" % lim,
-            c=csum, v=val, fid=self.id)
+        fids = self.deck.db.list(
+            "select fid from fsums where csum = ? and fid != ? and mid = ?",
+            csum, self.id or 0, self.mid)
+        if not fids:
+            return True
+        # grab facts with the same checksums, and see if they're actually
+        # duplicates
+        for flds in self.deck.db.list("select flds from facts where id in "+
+                                      ids2str(fids)):
+            fields = splitFields(flds)
+            if fields[ord] == val:
+                return False
+        return True

    def fieldComplete(self, name, text=None):
        (ord, conf) = self._fmap[name]
--- a/anki/importing/init.py
+++ b/anki/importing/init.py
@ -361,15 +361,11 @@ where factId in (%s)""" % ",".join([str(s) for s in factIds]))
 from anki.importing.csvfile import TextImporter
 from anki.importing.anki10 import Anki10Importer
 from anki.importing.mnemosyne10 import Mnemosyne10Importer
-from anki.importing.wcu import WCUImporter
 from anki.importing.supermemo_xml import SupermemoXmlImporter
-from anki.importing.dingsbums import DingsBumsImporter

 Importers = (
    (_("Text separated by tabs or semicolons (*)"), TextImporter),
    (_("Anki Deck (*.anki)"), Anki10Importer),
    (_("Mnemosyne Deck (*.mem)"), Mnemosyne10Importer),
-    (_("CueCard Deck (*.wcu)"), WCUImporter),
    (_("Supermemo XML export (*.xml)"), SupermemoXmlImporter),
-    (_("DingsBums?! Deck (*.dbxml)"), DingsBumsImporter),
    )
--- a/anki/importing/dingsbums.py
+++ b/anki/importing/dingsbums.py
@ -1,222 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright: rick@vanosten.net
-# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
-
-"""\
-Importing DingsBums?! decks (see dingsbums.vanosten.net)
-========================================================
-
-GENERAL:
-* DingsBums?! files are xml with relational content.
-* DingsBums?!'s data format is more relational than Anki's. Therefore some of the relations are denormalized.
-
-* A stack in DingsBums?! is a deck in Anki
-* An entry type in DingsBums?! is a model in Anki
-* An entry type attribute in DingsBums?! is a field in Anki
-* An entry type attribute item in DingsBums?! does not exist in Anki. It is just the contents of a field denormalized.
-* There is not concept of units and categories in Anki.
-* An entry in DingsBums?! is basically a fact in Anki
-* There are no cards in DingsBums?!
-* There is a special plugin in Anki for Pinyin. Therefore syllable settings from DingsBums?! are ignored.
-* The locale settings in DingsBums?! have never been active and are therefore ignored.
-* All statistics will get lost - i.e. no historic informaiton about progress will be migrated to Anki.
-* The DingsBums?! stack needs to end with *.xml in order to be recognizable in Anki import.
-* The learning levels from DingsBums?! are not taken into account because they do not really match spaced repetition.
-
-DESIGN OF MAPPING FROM DingsBums?! TO Anki
-*
-* The contents of units and categories are transferred as tags to Anki: unit/category label + "_" + unit/category name.
-* If unit/category name has space, then it is replaced by "_"
-* The fields "base", "target", explanation", example", "pronounciation" and "relation" are created as fields in Anki
-* The fields are only created and used in Anki, if they were visible in DingsBums?!, i.e. < 3:
-VISIBILITY_ALWAYS = 0;
-VISIBILITY_QUERY = 1;
-VISIBILITY_SOLUTION = 2;
-VISIBILITY_NEVER = 3;
-
-* The name of the fields in Anki is taken from the labels defined in the stack properties
-* The description field of Anki is not used/displayed. Therefore there is not much sense to transfer the contents of title, author, notes, copyright and license.
-* The visibility options in DingsBums?! are used as hints to make cards in Anki:
-    + Two card templates are made for each model and then applied to each fact when importing.
-    + "Forward": Base -> "Question", target -> "Answer"; if "always", then part of question; if "solution" or "part of query" then part of answer
-    + "Reverse": Target -> "Answer", base -> "Question"
-    + Unit and category are not shown, as they are tags and there is no possibility to distinguish between visibility settings in this case.
-
-CHANGES MADE TO LIBANKI:
-* Added libanki/anki/import/dingsbums.py
-* Added DingsBumsImporter to importers at end of file libanki/anki/import/__init__.py
-* Added libanki/tests/importing/dingsbums.xml
-* Added method test_dingsbums() to libanki/anki/tests/test_importing.py
-"""
-
-from anki.importing import Importer
-from anki import Deck
-from anki.facts import Fact
-from anki.models import Field, Template, Model
-from anki.lang import _
-
-from xml.sax import make_parser
-from xml.sax.handler import ContentHandler
-import sys
-
-class DingsBumsHandler(ContentHandler):
-
-    def __init__(self, deck):
-        self.eid = "0"
-        self.attributeItems = {}
-        self.unitCategories = {}
-        self.attributes = {}
-        self.currentContent = ""
-        self.labels = {}
-        self.labels["pro"] = u"Pronunciation" # the user cannot change this label and therefore not in xml-file
-        self.labels["rel"] = u"Relation"
-        self.visibility = {}
-        self.models = {}
-        self.typeAttributes = {} # mapping of entry type and attribute name (e.g. "ET8_A1", "ET8_A2", ...)
-        self.deck = deck
-        self.f = None # the current fact
-        self.countFacts = 0
-
-    def startElement(self, name, attrs):
-        """Implements SAX interface"""
-        if name in ["etai", "unit", "category"]:
-            self.eid = attrs["eid"]
-        elif "eta" == name:
-            self.attributes[attrs["eid"]] = attrs["n"]
-        elif "entrytype" == name:
-            self.createModel(attrs)
-        elif "e" == name:
-            self.createFact(attrs)
-
-    def endElement(self, name):
-        """Implements SAX interface"""
-        if "vocabulary" == name:
-            self.deck.updateProgress()
-        elif name.endswith("label"):
-            self.labels[name.replace("label", "")] = self.currentContent
-        elif name.startswith("vis"):
-            self.visibility[name.replace("vis", "")] = self.currentContent
-        elif "etai" == name:
-            self.attributeItems[self.eid] = self.currentContent
-        elif "etattributes" == name:
-            self.deck.updateProgress()
-        elif "entrytypes" == name:
-            self.deck.updateProgress()
-        elif "name" == name:
-            self.unitCategories[self.eid] = self.prepareTag(self.currentContent)
-        elif "units" == name:
-            self.deck.updateProgress()
-        elif "categories" == name:
-            self.deck.updateProgress()
-        elif "entries" == name:
-            self.deck.updateProgress()
-        elif "e" == name:
-            self.deck.addFact(self.f)
-            self.countFacts += 1
-        # there is a not logical mapping between the tags for fields and names in VocabInfo
-        # See net.vanosten.dings.consts.Constants.XML_*
-        elif "o" == name:
-            self.f.__setitem__(self.labels["b"], self.currentContent)
-        elif "d" == name:
-            self.f.__setitem__(self.labels["t"], self.currentContent)
-        elif "ep" == name:
-            self.f.__setitem__(self.labels["exp"], self.currentContent)
-        elif "ea" == name:
-            self.f.__setitem__(self.labels["ex"], self.currentContent)
-        elif "p" == name:
-            self.f.__setitem__(self.labels["pro"], self.currentContent)
-        elif "r" == name:
-            self.f.__setitem__(self.labels["rel"], self.currentContent)
-
-    def characters(self, content):
-        """Implements SAX interface"""
-        self.currentContent = content.strip()
-
-    def createModel(self, attrs):
-        """Makes a new Anki (fact) model from an entry type.
-        The card models are made each time from scratch in order that evt. model specific fields (attributes) can make part."""
-        m = Model(attrs["n"])
-        # field model for standard fields
-        m.addFieldModel(FieldModel(self.labels["b"], True, False)) #there is no uniqueness check in DingsBums?!
-        m.addFieldModel(FieldModel(self.labels["t"], True, False))
-        for aField in ["exp", "ex", "pro", "rel"]:
-            if self.visibility[aField] in "012":
-                m.addFieldModel(FieldModel(self.labels[aField], False, False))
-        # field models for attributes
-        for attr in ["a1", "a2" "a3", "a4"]:
-            if attr in attrs.keys():
-                m.addFieldModel(FieldModel(self.attributes[attrs[attr]], False, False))
-                self.typeAttributes[attrs["eid"] + "_" + attr] = self.attributes[attrs[attr]]
-
-        # card model for front
-        frontStrings = ["%(" + self.labels["b"] + ")s"]
-        backStrings = ["%(" + self.labels["t"] + ")s"]
-        for aField in ["exp", "ex", "pro", "rel"]:
-            if self.visibility[aField] in "01":
-                frontStrings.append("%(" + self.labels[aField] + ")s")
-            if self.visibility[aField] in "02":
-                backStrings.append("%(" + self.labels[aField] + ")s")
-        m.addCardModel(CardModel(u'Forward', "<br>".join(frontStrings), "<br>".join(backStrings)))
-        # card model for back
-        m.addCardModel(CardModel(u'Reverse', unicode("%(" + self.labels["t"] + ")s"), unicode("%(" + self.labels["b"] + ")s")))
-        # tags is just the name without spaces
-        m.tags = self.prepareTag(m.name)
-
-        # link
-        self.models[attrs["eid"]] = m
-        self.deck.addModel(m)
-
-    def createFact(self, attrs):
-        """Makes a new Anki fact from an entry."""
-        model = self.models[attrs["et"]]
-        self.f = Fact(model)
-        # process attributes
-        for attr in ["a1", "a2" "a3", "a4"]:
-            if attr in attrs.keys():
-                self.f.__setitem__(self.typeAttributes[attrs["et"] + "_" + attr], self.attributeItems[attrs[attr]])
-        # process tags. Unit, Category plus entry type name
-        tagString = unicode(self.unitCategories[attrs["u"]] + " " + self.unitCategories[attrs["c"]] + " " + model.tags)
-        self.f.tags = tagString
-
-    def prepareTag(self, stringWithSpace):
-        parts = stringWithSpace.split()
-        return "_".join(parts)
-
-class DingsBumsImporter(Importer):
-    needMapper = False # needs to overwrite default in Importer - otherwise Mapping dialog is shown in GUI
-
-    def __init__(self, deck, file):
-        Importer.__init__(self, deck, file)
-        self.deck = deck
-        self.file = file
-        self.total = 0
-
-    def doImport(self):
-        """Totally overrides the method in Importer"""
-        num = 7 # the number of updates to progress bar (see references in method endElement in DingsBumsHandler
-        self.deck.startProgress(num)
-        self.deck.updateProgress(_("Importing..."))
-
-        # parse the DingsBums?! xml file
-        handler = DingsBumsHandler(self.deck)
-        saxparser = make_parser(  )
-        saxparser.setContentHandler(handler)
-        saxparser.parse(self.file)
-        self.total = handler.countFacts
-        self.deck.finishProgress()
-        self.deck.setModified()
-
-if __name__ == '__main__':
-    print "Starting ..."
-
-    # for testing you can start it standalone. Use an argument to specify the file to import
-    filename = str(sys.argv[1])
-
-    mydeck = DeckStorage.Deck()
-    i = DingsBumsImporter(mydeck, filename)
-    i.doImport()
-    assert 7 == i.total
-    mydeck.db.close()
-
-    print "... Finished"
-    sys.exit(1)
--- a/anki/importing/wcu.py
+++ b/anki/importing/wcu.py
@ -1,51 +0,0 @@
-# -*- coding: utf-8 -*-
-# Author Chris Aakre <caaakre@gmail.com>
-# License: GNU GPL, version 3 or later; http://www.gnu.org/copyleft/gpl.html
-
-import codecs
-from anki.importing import Importer, ForeignCard
-from anki.lang import _
-from anki.errors import *
-
-class WCUImporter(Importer):
-    def __init__(self, *args):
-        Importer.__init__(self, *args)
-        self.lines = None
-        self.numFields=int(2)
-
-    def foreignCards(self):
-        from xml.dom import minidom, Node
-        cards = []
-        f = None
-        try:
-            f = codecs.open(self.file, encoding="utf-8")
-        except:
-            raise ImportFormatError(type="encodingError", info=_("The file was not in UTF8 format."))
-        f.close()
-        def wcuwalk(parent, cards, level=0):
-                for node in parent.childNodes:
-                    if node.nodeType == Node.ELEMENT_NODE:
-                        myCard=ForeignCard()
-                        if node.attributes.has_key("QuestionPicture"):
-                            question = [unicode('<img src="'+node.attributes.get("QuestionPicture").nodeValue+'"><br/>'+node.attributes.get("Question").nodeValue)]
-                        else:
-                            question = [unicode(node.attributes.get("Question").nodeValue)]
-                        if node.attributes.has_key("AnswerPicture"):
-                            answer = [unicode('<img src="'+node.attributes.get("AnswerPicture").nodeValue+'"><br/>'+node.attributes.get("Answer").nodeValue)]
-                        else:
-                            answer = [unicode(node.attributes.get("Answer").nodeValue)]
-                        myCard.fields.extend(question)
-                        myCard.fields.extend(answer)
-                        cards.append(myCard)
-                        wcuwalk(node, cards, level+1)
-
-        def importwcu(file):
-            wcuwalk(minidom.parse(file).documentElement,cards)
-        importwcu(self.file)
-        return cards
-
-    def fields(self):
-        return self.numFields
-
-    def setNumFields(self):
-        self.numFields = int(2)
--- a/anki/storage.py
+++ b/anki/storage.py
@ -93,6 +93,12 @@ create table if not exists facts (
    data            text not null
 );

+create table if not exists fsums (
+    fid             integer not null,
+    mid             integer not null,
+    csum            integer not null
+);
+
 create table if not exists models (
    id              integer primary key,
    mod             integer not null,
--- a/anki/utils.py
+++ b/anki/utils.py
@ -256,6 +256,15 @@ def deleteTags(deltags, tags):
            currentTags.remove(r)
    return joinTags(currentTags)

+# Fields
+##############################################################################
+
+def joinFields(list):
+    return "\x1f".join(list)
+
+def splitFields(string):
+    return string.split("\x1f")
+
 # Misc
 ##############################################################################

--- a/tests/test_deck.py
+++ b/tests/test_deck.py
@ -88,34 +88,30 @@ def test_fieldChecksum():
    f['Front'] = u"new"; f['Back'] = u"new2"
    deck.addFact(f)
    assert deck.db.scalar(
-        "select csum from fdata where ord = 0") == "22af645d"
+        "select csum from fsums") == "22af645d"
    # empty field should have no checksum
    f['Front'] = u""
    f.flush()
    assert deck.db.scalar(
-        "select csum from fdata where ord = 0") == ""
+        "select count() from fsums") == 0
    # changing the val should change the checksum
    f['Front'] = u"newx"
    f.flush()
    assert deck.db.scalar(
-        "select csum from fdata where ord = 0") == "4b0e5a4c"
-    # back should have no checksum, because it's not set to be unique
-    assert deck.db.scalar(
-        "select csum from fdata where ord = 1") == ""
-    # if we turn on unique, it should get a checksum
-    f.model.fields[1].conf['unique'] = True
+        "select csum from fsums") == "4b0e5a4c"
+    # turning off unique and modifying the fact should delete the sum
+    f.model.fields[0]['uniq'] = False
    f.model.flush()
-    f.model.updateCache()
-    print deck.db.scalar(
-        "select csum from fdata where ord = 1")
+    f.flush()
    assert deck.db.scalar(
-        "select csum from fdata where ord = 1") == "82f2ec5f"
-    # turning it off doesn't currently zero the checksum for efficiency reasons
-    # f.model.fields[1].conf['unique'] = False
-    # f.model.flush()
-    # f.model.updateCache()
-    # assert deck.db.scalar(
-    #     "select csum from fdata where ord = 1") == ""
+        "select count() from fsums") == 0
+    # and turning on both should ensure two checksums generated
+    f.model.fields[0]['uniq'] = True
+    f.model.fields[1]['uniq'] = True
+    f.model.flush()
+    f.flush()
+    assert deck.db.scalar(
+        "select count() from fsums") == 2

 def test_upgrade():
    import tempfile, shutil